From 5814cb1cfe1cdebf0c3f700a5db73a844a478c54 Mon Sep 17 00:00:00 2001 From: Thomas Kuschel Date: Mon, 17 Jun 2024 19:16:28 +0200 Subject: [PATCH] ADD typo fixing file --- afu/.callbook_club | 8 +- afu/.gender | 528 +++++++++++++++++++++++++++++++++++++++++++++ afu/.typo_callbook | 7 + afu/README.md | 2 +- afu/callbook.py | 214 +++++++++++++++--- 5 files changed, 726 insertions(+), 33 deletions(-) create mode 100644 afu/.gender create mode 100644 afu/.typo_callbook diff --git a/afu/.callbook_club b/afu/.callbook_club index 13ae058..e739594 100644 --- a/afu/.callbook_club +++ b/afu/.callbook_club @@ -2,13 +2,17 @@ Landesverband Wien im Österreichischen Versuchssenderverband Amateurfunkverein des Österreichischen Bundesheeres - Austrian Military Radio Society Radio-Amateur-Klub der Technischen Universität Wien - Radio Amateur Club of the TU Wien Landesverband Tirol des Österreichischen Versuchssenderverbands -Österreichisches Rotes Kreuz, Landesverband Vorarlberg -Kulturverein der österreichischen Eisenbahner - Sektion Amateurfunk +Kulturverein der österreichischen Eisenbahner, Sektion Amateurfunk Höhere Technische Bundeslehr- und Versuchsanstalt Innsbruck Anichstraße Österreichisches Rotes Kreuz, Landesverband Tirol +Österreichisches Rotes Kreuz, Landesverband Vorarlberg +Österreichisches Rotes Kreuz, Landesverband Steiermark Johanniter Tirol Rettungs- und Einsatzdienste mildtätige GmbH Österreichischer Versuchssenderverband - Dachverband Landesverband Niederösterreich des Österreichischen Versuchssenderverbands Amateurfunkverband Salzburg - Landesverband des Österreichischen Versuchssenderverbandes "OAFV" des ÖVSV, Ortsgruppe Ried - Grieskirchen OÖ Amateurfunkverband, Ortsgruppe Ried-Grieskirchen +OE3XHT - Amateurfunkverein an der HTL St. Pölten +Stadtgemeinde Feldkirch Risiko- und Katastrophen-Schutz +IPA,LANDESGRUPPE VBG., z.Hd.Herrn Longhi Harald OE9HLH diff --git a/afu/.gender b/afu/.gender new file mode 100644 index 0000000..9c5a2f6 --- /dev/null +++ b/afu/.gender @@ -0,0 +1,528 @@ +m Abdul +m Abdull +m Achaz +m Adalbert +m Adalberto +m Alastair +m Albin +m Albrecht +m Aleksandar +m Aleksander +m Aleksey +m Alexander +m Alfons +m Alfger +m Aljoscha +m Aljosha +m Alois +m Alvaro +m András +m Andràs +m Andreas +m Andrej +m Ansgar +m Anton +m Archibald +m Arkadiusz +m Armin +m Arno +m Arnold +m Arnulf +m Arpad +m Arthur +m Artur +m Attila +m Augustinus +m Avdija +m Baldur +m Bartlmä +m Benedict +m Benedikt +m Benjamin +m Bernard +m Bernhard +m Berthold +m Bertram +m Bernd +m Bevan +m Bogoljub +m Boguslaw +m Bohumil +m Branko +m Brenden +m Bruno +m Burghard +m Burkhard +m Burkhart +m Camillo +m Carl +m Carlo +m Carlos +m Cezar-Iuliu +m Charles +m Christian +m Christian-Andrei +m Christian-Erich +m Christof +m Christoph +m Christopher +m Chungil +m Claudio +m Claus +m Clemens +m Cornelius +m Craig +m Cyrill +m Daniel +m David +m Denis +m Detlef +m Dietmar +m Domenik +m Dominic +m Dominik +m Dominique +m Dragan +m Guenter +m Eckart +m Eduard +m Edward +m Egidius +m Emmerich +m Engelbert +m Enrico +m Ernest +m Ernst +m Erwin +m Eugen +m Fabian +m Fabio +m Felix +m Ferdinand +m Filip-Jan +m Florian +m Franc +m Francesco +m Frank +m Franz +m Franz-Joseph +m Frederick +m Frederikus +m Fredy +m Friedmann +m Friedrich +m Fridolin +m Fritz +m Frohwald +m Gabriel +m Gavril +m Gebhard +m Georg +m Gerfried +m Gerhard +m Gernod +m Gernot +m Gerold +m Giulio +m Gisbert +m Gottfried +m Gottlieb +m Gregor +m Gregor-Emanuel +m Guenther +m Guido +m Gunnar +m Günther +m Guntram +m Gustav +m Gustav-Josef +m Hanno +m Hanns +m Hanns-Michael +m Hans +m Hans-Christian +m Hans-Christoph +m Hans-Ewald +m Hans-Joachim +m Hans-Jörg +m Hans-Jürgen +m Hans-Otto +m Hans-Peter +m Hans-Werner +m Hansjoerg +m Hansjörg +m Harald +m Harald-Thomas +m Harry +m Hartmut +m Hartwig +m Heimo +m Heiner-Anton +m Heinrich +m Heinz +m Hellmut +m Hellmuth +m Helmut +m Helmuth +m Henning +m Henryk +m Heribert +m Hermann +m Herolind +m Herwig +m Holger +m Horst +m Hubert +m Ingo +m Ingulf +m Ivaylo +m Izudin +m Jacob +m Jakob +m Jan +m Jann-Steffen +m Jochen +m Joerg +m Jonas +m Jonathan +m Josef +m Josef-Manfred +m Joseph-Maria +m Jozef +m Josip +m Johann +m Johannes +m Julian +m Julius +m Junichi +m Jürgen +m Karl-Heinz +m Karl-Otto +m Karl-Thomas +m Karlheinz +m Katarina +m Kevin +m Klaus +m Klaus-Dieter +m Klaus-Jürgen +m Klemens +m Konrad +m Konstantin +m Krzysztof +m Laurence OE6LUN +m Laurenz +m Laurin +m Laszlo +m Leonel +m Leonhard +m Leopold +m Liam +m Lothar +m Lucas +m Luciano +m Ludwig +m Lukas +m Manfred +m Manuel +m Marcel +m Marcin +m Marco +m Marcus +m Mario +m Mario-Rafael +m Marius +m Marko +m Markus +m Martin +m Marzell +m Matija +m Matteo +m Matteo-Alessandro +m Matthäus +m Matthew +m Matthias +m Maxim +m Maximilian +m Meinrad +m Meletios +m Michael +m Mihaly +m Mikhail +m Mirian +m Mirijan +m Miroslav +m Monty +m Muhammed +m Murat +m Nanak +m Nicolai +m Nicolas +m Niels-Henrik +m Niklas +m Nikolas +m Mikolaus +m Noah +m Norbert +m Ole-Christian +m Oliver +m Oliver-Helmut +m Ortwin +m Othmar +m Ovidiu +m Ovidiu-Dan +m Patrick +m Patrik +m Paul +m Paulino +m Peter +m Peter-Ernst +m Peter-Holger +m Peter-Philipp +m Petros +m Philemon +m Philipp +m Piotr +m Primoz +m Radovan +m Raimund +m Rainer +m Raffael +m Raffi +m Raoul +m Raphael +m Ralph +m Reinald +m Reinhard +m Reinhart +m Rembert +m Riccardo +m Richard +m Reinhold +m René +m René-Lysander +m Roland +m Rolf-Dietrich +m Romain +m Roman +m Ronald +m Rüdiger +m Rudolf +m Rupert +m Samuel +m Sándor +m Sandro +m Santiago +m Sebastian +m Seong +m Severin +m Siegfried +m Siegmar +m Simon +m Stefan +m Stefano +m Steffen +m Stelian-Gabriel +m Stelio +m Stephan +m Stephen +m Subagio-Rasidi +m Sven-Erik +m Tamim +m Theodor +m Thomas +m Thomas-Michael +m Thorsten +m Tillmann +m Tobias +m Tomislav +m Tommaso +m Tonny +m Ümmet +m Urban +m Valentin +m Valerian +m Viktor +m Viorel +m Vjekoslav +m Vladimir +m Volker +m Waldemar +m Walter +m Werner +m Wieland +m Wigbert +m Willibald +m Wilfried +m Wilfrid +m Wilhelm +m Wolf-Dieter +m Wolfred +m Winfried +m Wolfgang +m Wolfram +m Yannic +m Zeljko +m Zlatko +m Zvonko +w Adelheid +w Alexandra +w Andrea +w Angela +w Angelika +w Anita +w Anna +w Anna-Maria +w Anneliese +w Annemarie +w Astrid +w Auguste +w Barbara +w Beatrice +w Beatrix +w Bernadette +w Bernardine +w Bettina +w Bianca +w Birgit +w Brigitte +w Britta +w Carmen +w Chiara +w Christa +w Christine +w Christl +w Cornelia +w Durdica +w Edeltraud +w Elfriede +w Elisabeth +w Elke +w Erdmuthe +w Ernestine +w Eva-Maria +w Eveline +w Evelyn +w Flora +w Franziska +w Frederike +w Frieda +w Friederike +w Gabriela +w Gabriella +w Gabriele +w Gerda +w Gerlinde +w Gertraude +w Gertrude +w Gisela +w Gudrun +w Gunhild +w Gustav +w Hannelore +w Heidelinde +w Heidi +w Heidrun +w Helga +w Hemma +w Hermine +w Herta +w Hildegard +w Ingeborg +w Ingeburg +w Ingrid +w Isabel +w Isabella +w Isolde +w Janet +w Jasmin +w Jemilla-Katalin +w Jessica +w Johanna +w Josefine +w Julia +w Juliana +w Jutta +w Karin +w Karolina +w Karoline +w Katharina +w Kathrin +w Katja +w Katrin +w Kerstin +w Klaudia +w Laila +w Larissa +w Leonie +w Lieselotte +w Ligia +w Lisbeth +w Lygia +w Luisa +w Luiza +w Magdalena +w Manfreda +w Manuela +w Margareta +w Margarethe +w Margot +w Margret +w Marianne +w Marie-Luise +w Marina +w Marion +w Marlene +w Martha +w Martina +w Mathilde +w Mechthild +w Melanie +w Michaela +w Nadine +w Natasa +w Natascha +w Nicole +w Nikolitsa +w Nina +w Noriko +w Olivia +w Patrizia +w Paulina +w Pauline +w Phaedra +w Regina +w Reinhilde +w Renate +w Renee +w Rosina +w Roswitha +w Sabine +w Sandra +w Senada +w Ricarda +w Sieglinde +w Silvia +w Simone +w Solveig +w Sonja +w Sophia +w Sophie +w Stefanie +w Steffi +w Stephanie +w Susanne +w Sybille +w Tadeja +w Tamara +w Tanja +w Tatjana +w Theresia +w Ulrike +w Ursula +w Valerie +w Valery +w Veronika +w Victoria +w Waldtraud +w Waltraud +w Yvonne \ No newline at end of file diff --git a/afu/.typo_callbook b/afu/.typo_callbook new file mode 100644 index 0000000..0e82b55 --- /dev/null +++ b/afu/.typo_callbook @@ -0,0 +1,7 @@ +# TYPO CALLBOOK - 2024-06-17 Version 1.0.0 +# X ... Nachname mit Vorname(n) vertauscht +# F ... Vorname(n) falsch geschrieben +# You have to write the values exactly under the titles Nachname, Vorname +* Rufz Nachname Vorname +F OE1CGC Gasser Christoph +X OE5ENN Kolmhofer Erich diff --git a/afu/README.md b/afu/README.md index d383286..2acd99b 100644 --- a/afu/README.md +++ b/afu/README.md @@ -228,4 +228,4 @@ Clone the repository `script` to your site with: $ yay -S python-mysql-connector Hint: At the moment the compilation fails. Will be updated soon. - See https://jira.mariadb.org/projects/CONPY/issues/CONPY-284 \ No newline at end of file + See https://jira.mariadb.org/projects/CONPY/issues/CONPY-284 (2024-06-16) \ No newline at end of file diff --git a/afu/callbook.py b/afu/callbook.py index e6ba3f1..7d9dcb5 100755 --- a/afu/callbook.py +++ b/afu/callbook.py @@ -33,13 +33,16 @@ def call_parser(): parser.add_argument('-V', '--version', action='version', version='{} {}'.format(os.path.split(__file__)[1],__version__)) parser.add_argument('-v', '--verbose', action='append_const', const = 1) parser.add_argument('-p', '--path', default='Rufzeichenliste_AT_Stand_010624.pdf', help= 'skip the download if the specified path to a PDF file exists') + # parser.add_argument('-t', '--type', default='' , help='specify the output, supported types are [ CSV | JSON ]') # not implemented yet + parser.add_argument('-o', '--output', default='', help='specify the file where the data are written to, default stdout') + parser.add_argument('-m', '--mariadb', help='SQL interface to MariaDB (MySql) format ": " or defined in .config') parser.add_argument('url', metavar='URL', nargs='?', default=__website__) opt = parser.parse_args() opt.verbose = 0 if opt.verbose is None else sum(opt.verbose) return opt -def call_website(url,verbose,path='',interactive=False): +def call_website(url,verbose,path='',interactive=False,output='',mariadb=''): if path: if os.path.exists(path): @@ -78,7 +81,7 @@ def call_website(url,verbose,path='',interactive=False): if(interactive): time.sleep(300) else: - time.sleep(5) + time.sleep(4) driver.close() return os.path.basename(href) @@ -91,29 +94,31 @@ def remove_first_quote_if_odd(text, verbose = 0): # Find and remove the first quote for i, char in enumerate(text): if char in ['"']: # ['"', "'"]: - if (verbose > 0): + if (verbose > 1): print(text) text = text[:i] + text[i+1:] - if (verbose > 0): + if (verbose > 1): print(text) break return text def is_clubstation(call): assert(len(call) > 3) - if call[3].upper() == 'X': + if call[3].upper() == 'X' or call.upper() == 'OE5SIX': # special case with OE5SIX (Clubstation) return True + return False + def replace_substring_with_line(path, search_substring, verbose=0): try: - with open(path, 'r') as file: - lines = file.readlines() - - search_substring - for line in lines: - if search_substring[0:40].lower() in line.lower(): + if not replace_substring_with_line.lines: + with open(path, 'r') as file: + replace_substring_with_line.lines = file.readlines() + + for line in replace_substring_with_line.lines: + if search_substring[0:46].lower() in line.lower(): modified_line = line.strip() # Replace the substring with the whole line ## line = line.lower().replace(search_substring.lower(), modified_line) @@ -127,6 +132,153 @@ def replace_substring_with_line(path, search_substring, verbose=0): return search_substring +replace_substring_with_line.lines = None + +def gender_substring(path, search_substring, verbose=0): + try: + if not gender_substring.lines: + with open(path, 'r') as file: + gender_substring.lines = file.readlines() + + for line in gender_substring.lines: + if search_substring[2:].lower() in line.lower(): + return line[0] + except FileNotFoundError: + print(f'The file {path} was not found.') + except Exception as e: + print(f'An error occurred: {e}') + + return 'x' # not found, unknown gender + +gender_substring.lines = None + +def get_gender(firstnames, surname, call, verbose=0): + + # load the .gender file: + genderfile = '.gender' + gender = 'x' + gpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), genderfile) + if os.path.exists(gpath): + firstname = firstnames.split(' ', 1)[0] + gender = gender_substring(gpath, firstname, verbose=0) # only check 1st/firstname of name, important when there are more than 1 firstnames + if gender == 'x': + if verbose > 0: + get_gender.cnt += 1 + print(f'({get_gender.cnt}){call} "{firstname}" [{firstnames} {surname}] not found in file {genderfile} - gender "x" is set.') + return gender + +get_gender.cnt = 0 + +def call_split_name(fullname, call, verbose): + + assert(len(fullname) > 1) + + name = fullname.split(' ', 1) + surname = name[0] + # several special cases like surname "de Lijezer", "van Dijk", "el Shamaa", etc. + match surname.lower(): + case 'de' | 'el': + name = fullname[3:].split(' ',1) + surname = surname.lower() + ' ' + name[0] + if verbose > 0: + print(f'## {fullname} --> {surname} ##') + + case 'van' | 'von' : + name = fullname[4:].split(' ',1) + surname = surname.lower() + ' ' + name[0] + if surname.lower() in ['van der', 'von der', 'van den']: # e.g. "van der Meulen", "Walther von der Vogelweide", "Annie van den Berg" + name = fullname[8:].split(' ',1) + surname = surname.lower() + ' ' + name[0] + if verbose > 0: + print(f'## {fullname} --> {surname} ##') + case 'della' : # Ancient Italian noble family "della Rowere" + name = fullname[6:].split(' ',1) + surname = surname.lower() + ' ' + name[0] + if verbose > 0: + print(f'## {fullname} --> {surname} ##') + + if len(name) > 1: + firstname = name[1] + else: + firstname = '' + + # In Austria the call suffix starting with Y is an YL (young lady) +# if call[3].upper() == 'Y': + if False: + gender = 'f' + else: + gender = get_gender(firstname, surname, call, verbose) + + return firstname, surname, gender + +def fix_typo(call, fullname, verbose=1): + fixtypofile = '.typo_callbook' + path = os.path.join(os.path.dirname(os.path.abspath(__file__)), fixtypofile) + if os.path.exists(path): + try: + if not fix_typo.lines: + with open(path, 'r') as file: + fix_typo.lines = file.readlines() + + for line in fix_typo.lines[4:]: # starting with line 4 + if len(fix_typo.spaces) == 0: # not initialized + if line[0] == '*': + words = line.split() + assert len(words) == 4 # i.e. '*, call, nachname, vorname' + fix_typo.spaces = [line.index(words[1]), line.index(words[2]), line.index(words[3])] + else: + if call in line[2:8]: + print(f'Call: {call} found') + match line[0]: + case '#': + if verbose > 1: + print(line.rstrip()) + case 'F': + if verbose > 0: + print(fullname) + print(line.rstrip()) + firstname1, surname1, gender1 = call_split_name(fullname, call, 0) + fullname2 = line[fix_typo.spaces[1]:fix_typo.spaces[2]-1].rstrip() + ' ' + line[fix_typo.spaces[2]:-1] + if verbose > 0: + print(fullname2) + firstname2, surname2, gender2 = call_split_name(fullname2, call, 0) + # Hardening: at a minimum, either the firstnames or the surenames must fit + fix_cnt = 0 + if (firstname1 != firstname2): + fix_cnt += 1 + if (surname1 != surname2): + fix_cnt += 1 + if fix_cnt == 0: + print(f'It is fixed! You can remove the line with the item {call} from the file {fixtypofile}!') + elif fix_cnt > 1: + print(f'Something went wrong, there are several bugs. Check line with call {call} in file {fixtypofile}!') + else: + fullname = fullname2 + case 'X': # exchange the surname with firstname + if verbose > 0: + print(fullname) + print(line.rstrip()) + firstname1, surname1, gender1 = call_split_name(fullname, call, 0) + fullname2 = line[fix_typo.spaces[1]:fix_typo.spaces[2]-1].rstrip() + ' ' + line[fix_typo.spaces[2]:-1] + if verbose > 0: + print(fullname2) + firstname2, surname2, gender2 = call_split_name(fullname2, call, 0) + fix_cnt = 0 + if (firstname1 == firstname2) and (surname1 == surname2): + print(f'It is fixed! You can remove the line with the item {call} from the file {fixtypofile}!') + elif (firstname1 != surname2) or (surname1 != firstname2): + print(f'Something went wrong, there are several bugs. Check line with call {call} in file {fixtypofile}!') + else: + fullname = fullname2 + + except FileNotFoundError: + print(f'The file {path} was not found.') + except Exception as e: + print(f'An error occurred: {e}') + + return fullname +fix_typo.lines = None +fix_typo.spaces = [] def call_data_record(line, mod_date, verbose): @@ -153,31 +305,33 @@ def call_data_record(line, mod_date, verbose): match = re.search(r'^(OE[0-9][A-Z]{1,3})', call) assert(match.string == call) fullname = records[1] + location = records[2] + address = records[3] + permit_class = records[4] + fullname = fix_typo(call, fullname, verbose) # If there is a clubstation if is_clubstation(call): # Name starting with only one quotation marks e.g. " -- remove that one: fullname = remove_first_quote_if_odd(fullname, verbose) # only found @ clubstations clubstationfile = '.callbook_club' - if verbose > 0: + if verbose > 1: print(f'Call: {call}, Name: {fullname}') path = os.path.join(os.path.dirname(os.path.abspath(__file__)), clubstationfile) if os.path.exists(path): fullname = replace_substring_with_line(path, fullname, verbose) - if verbose > 0: - print(f'Call: {call}, Name: {fullname}') - + gender = '*' + elif fullname[0] == '*': + gender = '*' + else: # Try to split the YL or OMs Name, guess the gender + firstname, surname, gender = call_split_name(fullname, call, verbose) + if verbose > 1: + if gender == '*': + print(f'Call: {call}, Name: {fullname}, Gender: {gender}') + else: + print(f'Call: {call}, First Name: {firstname}, Surname: {surname}, Gender: {gender}') + print(f'Location: {location}, Address: {address}, Permit: {permit_class}') -# if not record: -# return -# if verbose == 1: -# print(record.group(1)) -# if verbose >= 3: -# print(f'Call: {record.group(1)}') -# print(f'Name: {record.group(2)}') -# #print(f'Location: {record[3]}') -# #print(f'Address: {record[4]}') -# #print(f'Permit Class: {record[5]}') def call_analyse_pdf(file, verbose): @@ -196,13 +350,13 @@ def call_analyse_pdf(file, verbose): print(f' Title: {meta.title}') print(f' Created: {meta.creation_date}') print(f'Modified: {meta.modification_date}') - + for page in reader.pages: page_text = page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False) lines = page_text.strip().splitlines() for line in lines[3:-2]: line = line.strip() - # calls = re.findall(r' +(OE[0-9][A-Z]{1,3}).*$', page_text) + if verbose >= 2: print(line) call_data_record(line, meta.modification_date,verbose) @@ -210,11 +364,11 @@ def call_analyse_pdf(file, verbose): if __name__ == '__main__': # call_description() args = call_parser() - # filename = 'Rufzeichenliste_AT_Stand_010624.pdf' + try: filename = call_website(**vars(args)) - - print(f'Filename: {filename}') + if args.verbose > 1: + print(f'Filename: {filename}') call_analyse_pdf(filename,args.verbose) sys.exit(0) except Exception as e: