ADD typo fixing file

This commit is contained in:
Thomas Kuschel 2024-06-17 19:16:28 +02:00
parent ad1a7c4507
commit 5814cb1cfe
5 changed files with 726 additions and 33 deletions

View File

@ -2,13 +2,17 @@ Landesverband Wien im Österreichischen Versuchssenderverband
Amateurfunkverein des Österreichischen Bundesheeres - Austrian Military Radio Society Amateurfunkverein des Österreichischen Bundesheeres - Austrian Military Radio Society
Radio-Amateur-Klub der Technischen Universität Wien - Radio Amateur Club of the TU Wien Radio-Amateur-Klub der Technischen Universität Wien - Radio Amateur Club of the TU Wien
Landesverband Tirol des Österreichischen Versuchssenderverbands Landesverband Tirol des Österreichischen Versuchssenderverbands
Österreichisches Rotes Kreuz, Landesverband Vorarlberg Kulturverein der österreichischen Eisenbahner, Sektion Amateurfunk
Kulturverein der österreichischen Eisenbahner - Sektion Amateurfunk
Höhere Technische Bundeslehr- und Versuchsanstalt Innsbruck Anichstraße Höhere Technische Bundeslehr- und Versuchsanstalt Innsbruck Anichstraße
Österreichisches Rotes Kreuz, Landesverband Tirol Österreichisches Rotes Kreuz, Landesverband Tirol
Österreichisches Rotes Kreuz, Landesverband Vorarlberg
Österreichisches Rotes Kreuz, Landesverband Steiermark
Johanniter Tirol Rettungs- und Einsatzdienste mildtätige GmbH Johanniter Tirol Rettungs- und Einsatzdienste mildtätige GmbH
Österreichischer Versuchssenderverband - Dachverband Österreichischer Versuchssenderverband - Dachverband
Landesverband Niederösterreich des Österreichischen Versuchssenderverbands Landesverband Niederösterreich des Österreichischen Versuchssenderverbands
Amateurfunkverband Salzburg - Landesverband des Österreichischen Versuchssenderverbandes Amateurfunkverband Salzburg - Landesverband des Österreichischen Versuchssenderverbandes
"OAFV" des ÖVSV, Ortsgruppe Ried - Grieskirchen "OAFV" des ÖVSV, Ortsgruppe Ried - Grieskirchen
OÖ Amateurfunkverband, Ortsgruppe Ried-Grieskirchen OÖ Amateurfunkverband, Ortsgruppe Ried-Grieskirchen
OE3XHT - Amateurfunkverein an der HTL St. Pölten
Stadtgemeinde Feldkirch Risiko- und Katastrophen-Schutz
IPA,LANDESGRUPPE VBG., z.Hd.Herrn Longhi Harald OE9HLH

528
afu/.gender Normal file
View File

@ -0,0 +1,528 @@
m Abdul
m Abdull
m Achaz
m Adalbert
m Adalberto
m Alastair
m Albin
m Albrecht
m Aleksandar
m Aleksander
m Aleksey
m Alexander
m Alfons
m Alfger
m Aljoscha
m Aljosha
m Alois
m Alvaro
m András
m Andràs
m Andreas
m Andrej
m Ansgar
m Anton
m Archibald
m Arkadiusz
m Armin
m Arno
m Arnold
m Arnulf
m Arpad
m Arthur
m Artur
m Attila
m Augustinus
m Avdija
m Baldur
m Bartlmä
m Benedict
m Benedikt
m Benjamin
m Bernard
m Bernhard
m Berthold
m Bertram
m Bernd
m Bevan
m Bogoljub
m Boguslaw
m Bohumil
m Branko
m Brenden
m Bruno
m Burghard
m Burkhard
m Burkhart
m Camillo
m Carl
m Carlo
m Carlos
m Cezar-Iuliu
m Charles
m Christian
m Christian-Andrei
m Christian-Erich
m Christof
m Christoph
m Christopher
m Chungil
m Claudio
m Claus
m Clemens
m Cornelius
m Craig
m Cyrill
m Daniel
m David
m Denis
m Detlef
m Dietmar
m Domenik
m Dominic
m Dominik
m Dominique
m Dragan
m Guenter
m Eckart
m Eduard
m Edward
m Egidius
m Emmerich
m Engelbert
m Enrico
m Ernest
m Ernst
m Erwin
m Eugen
m Fabian
m Fabio
m Felix
m Ferdinand
m Filip-Jan
m Florian
m Franc
m Francesco
m Frank
m Franz
m Franz-Joseph
m Frederick
m Frederikus
m Fredy
m Friedmann
m Friedrich
m Fridolin
m Fritz
m Frohwald
m Gabriel
m Gavril
m Gebhard
m Georg
m Gerfried
m Gerhard
m Gernod
m Gernot
m Gerold
m Giulio
m Gisbert
m Gottfried
m Gottlieb
m Gregor
m Gregor-Emanuel
m Guenther
m Guido
m Gunnar
m Günther
m Guntram
m Gustav
m Gustav-Josef
m Hanno
m Hanns
m Hanns-Michael
m Hans
m Hans-Christian
m Hans-Christoph
m Hans-Ewald
m Hans-Joachim
m Hans-Jörg
m Hans-Jürgen
m Hans-Otto
m Hans-Peter
m Hans-Werner
m Hansjoerg
m Hansjörg
m Harald
m Harald-Thomas
m Harry
m Hartmut
m Hartwig
m Heimo
m Heiner-Anton
m Heinrich
m Heinz
m Hellmut
m Hellmuth
m Helmut
m Helmuth
m Henning
m Henryk
m Heribert
m Hermann
m Herolind
m Herwig
m Holger
m Horst
m Hubert
m Ingo
m Ingulf
m Ivaylo
m Izudin
m Jacob
m Jakob
m Jan
m Jann-Steffen
m Jochen
m Joerg
m Jonas
m Jonathan
m Josef
m Josef-Manfred
m Joseph-Maria
m Jozef
m Josip
m Johann
m Johannes
m Julian
m Julius
m Junichi
m Jürgen
m Karl-Heinz
m Karl-Otto
m Karl-Thomas
m Karlheinz
m Katarina
m Kevin
m Klaus
m Klaus-Dieter
m Klaus-Jürgen
m Klemens
m Konrad
m Konstantin
m Krzysztof
m Laurence OE6LUN
m Laurenz
m Laurin
m Laszlo
m Leonel
m Leonhard
m Leopold
m Liam
m Lothar
m Lucas
m Luciano
m Ludwig
m Lukas
m Manfred
m Manuel
m Marcel
m Marcin
m Marco
m Marcus
m Mario
m Mario-Rafael
m Marius
m Marko
m Markus
m Martin
m Marzell
m Matija
m Matteo
m Matteo-Alessandro
m Matthäus
m Matthew
m Matthias
m Maxim
m Maximilian
m Meinrad
m Meletios
m Michael
m Mihaly
m Mikhail
m Mirian
m Mirijan
m Miroslav
m Monty
m Muhammed
m Murat
m Nanak
m Nicolai
m Nicolas
m Niels-Henrik
m Niklas
m Nikolas
m Mikolaus
m Noah
m Norbert
m Ole-Christian
m Oliver
m Oliver-Helmut
m Ortwin
m Othmar
m Ovidiu
m Ovidiu-Dan
m Patrick
m Patrik
m Paul
m Paulino
m Peter
m Peter-Ernst
m Peter-Holger
m Peter-Philipp
m Petros
m Philemon
m Philipp
m Piotr
m Primoz
m Radovan
m Raimund
m Rainer
m Raffael
m Raffi
m Raoul
m Raphael
m Ralph
m Reinald
m Reinhard
m Reinhart
m Rembert
m Riccardo
m Richard
m Reinhold
m René
m René-Lysander
m Roland
m Rolf-Dietrich
m Romain
m Roman
m Ronald
m Rüdiger
m Rudolf
m Rupert
m Samuel
m Sándor
m Sandro
m Santiago
m Sebastian
m Seong
m Severin
m Siegfried
m Siegmar
m Simon
m Stefan
m Stefano
m Steffen
m Stelian-Gabriel
m Stelio
m Stephan
m Stephen
m Subagio-Rasidi
m Sven-Erik
m Tamim
m Theodor
m Thomas
m Thomas-Michael
m Thorsten
m Tillmann
m Tobias
m Tomislav
m Tommaso
m Tonny
m Ümmet
m Urban
m Valentin
m Valerian
m Viktor
m Viorel
m Vjekoslav
m Vladimir
m Volker
m Waldemar
m Walter
m Werner
m Wieland
m Wigbert
m Willibald
m Wilfried
m Wilfrid
m Wilhelm
m Wolf-Dieter
m Wolfred
m Winfried
m Wolfgang
m Wolfram
m Yannic
m Zeljko
m Zlatko
m Zvonko
w Adelheid
w Alexandra
w Andrea
w Angela
w Angelika
w Anita
w Anna
w Anna-Maria
w Anneliese
w Annemarie
w Astrid
w Auguste
w Barbara
w Beatrice
w Beatrix
w Bernadette
w Bernardine
w Bettina
w Bianca
w Birgit
w Brigitte
w Britta
w Carmen
w Chiara
w Christa
w Christine
w Christl
w Cornelia
w Durdica
w Edeltraud
w Elfriede
w Elisabeth
w Elke
w Erdmuthe
w Ernestine
w Eva-Maria
w Eveline
w Evelyn
w Flora
w Franziska
w Frederike
w Frieda
w Friederike
w Gabriela
w Gabriella
w Gabriele
w Gerda
w Gerlinde
w Gertraude
w Gertrude
w Gisela
w Gudrun
w Gunhild
w Gustav
w Hannelore
w Heidelinde
w Heidi
w Heidrun
w Helga
w Hemma
w Hermine
w Herta
w Hildegard
w Ingeborg
w Ingeburg
w Ingrid
w Isabel
w Isabella
w Isolde
w Janet
w Jasmin
w Jemilla-Katalin
w Jessica
w Johanna
w Josefine
w Julia
w Juliana
w Jutta
w Karin
w Karolina
w Karoline
w Katharina
w Kathrin
w Katja
w Katrin
w Kerstin
w Klaudia
w Laila
w Larissa
w Leonie
w Lieselotte
w Ligia
w Lisbeth
w Lygia
w Luisa
w Luiza
w Magdalena
w Manfreda
w Manuela
w Margareta
w Margarethe
w Margot
w Margret
w Marianne
w Marie-Luise
w Marina
w Marion
w Marlene
w Martha
w Martina
w Mathilde
w Mechthild
w Melanie
w Michaela
w Nadine
w Natasa
w Natascha
w Nicole
w Nikolitsa
w Nina
w Noriko
w Olivia
w Patrizia
w Paulina
w Pauline
w Phaedra
w Regina
w Reinhilde
w Renate
w Renee
w Rosina
w Roswitha
w Sabine
w Sandra
w Senada
w Ricarda
w Sieglinde
w Silvia
w Simone
w Solveig
w Sonja
w Sophia
w Sophie
w Stefanie
w Steffi
w Stephanie
w Susanne
w Sybille
w Tadeja
w Tamara
w Tanja
w Tatjana
w Theresia
w Ulrike
w Ursula
w Valerie
w Valery
w Veronika
w Victoria
w Waldtraud
w Waltraud
w Yvonne

7
afu/.typo_callbook Normal file
View File

@ -0,0 +1,7 @@
# TYPO CALLBOOK - 2024-06-17 Version 1.0.0
# X ... Nachname mit Vorname(n) vertauscht
# F ... Vorname(n) <firstname> falsch geschrieben
# You have to write the values exactly under the titles Nachname, Vorname
* Rufz Nachname Vorname
F OE1CGC Gasser Christoph
X OE5ENN Kolmhofer Erich

View File

@ -228,4 +228,4 @@ Clone the repository `script` to your site with:
$ yay -S python-mysql-connector $ yay -S python-mysql-connector
Hint: At the moment the compilation fails. Will be updated soon. Hint: At the moment the compilation fails. Will be updated soon.
See https://jira.mariadb.org/projects/CONPY/issues/CONPY-284 See https://jira.mariadb.org/projects/CONPY/issues/CONPY-284 (2024-06-16)

View File

@ -33,13 +33,16 @@ def call_parser():
parser.add_argument('-V', '--version', action='version', version='{} {}'.format(os.path.split(__file__)[1],__version__)) parser.add_argument('-V', '--version', action='version', version='{} {}'.format(os.path.split(__file__)[1],__version__))
parser.add_argument('-v', '--verbose', action='append_const', const = 1) parser.add_argument('-v', '--verbose', action='append_const', const = 1)
parser.add_argument('-p', '--path', default='Rufzeichenliste_AT_Stand_010624.pdf', help= 'skip the download if the specified path to a PDF file exists') parser.add_argument('-p', '--path', default='Rufzeichenliste_AT_Stand_010624.pdf', help= 'skip the download if the specified path to a PDF file exists')
# parser.add_argument('-t', '--type', default='' , help='specify the output, supported types are [ CSV | JSON ]') # not implemented yet
parser.add_argument('-o', '--output', default='', help='specify the file where the data are written to, default stdout')
parser.add_argument('-m', '--mariadb', help='SQL interface to MariaDB (MySql) format "<IP-Address>:<Port> <User> <Passwd>" or defined in .config')
parser.add_argument('url', metavar='URL', nargs='?', default=__website__) parser.add_argument('url', metavar='URL', nargs='?', default=__website__)
opt = parser.parse_args() opt = parser.parse_args()
opt.verbose = 0 if opt.verbose is None else sum(opt.verbose) opt.verbose = 0 if opt.verbose is None else sum(opt.verbose)
return opt return opt
def call_website(url,verbose,path='',interactive=False): def call_website(url,verbose,path='',interactive=False,output='',mariadb=''):
if path: if path:
if os.path.exists(path): if os.path.exists(path):
@ -78,7 +81,7 @@ def call_website(url,verbose,path='',interactive=False):
if(interactive): if(interactive):
time.sleep(300) time.sleep(300)
else: else:
time.sleep(5) time.sleep(4)
driver.close() driver.close()
return os.path.basename(href) return os.path.basename(href)
@ -91,29 +94,31 @@ def remove_first_quote_if_odd(text, verbose = 0):
# Find and remove the first quote # Find and remove the first quote
for i, char in enumerate(text): for i, char in enumerate(text):
if char in ['"']: # ['"', "'"]: if char in ['"']: # ['"', "'"]:
if (verbose > 0): if (verbose > 1):
print(text) print(text)
text = text[:i] + text[i+1:] text = text[:i] + text[i+1:]
if (verbose > 0): if (verbose > 1):
print(text) print(text)
break break
return text return text
def is_clubstation(call): def is_clubstation(call):
assert(len(call) > 3) assert(len(call) > 3)
if call[3].upper() == 'X': if call[3].upper() == 'X' or call.upper() == 'OE5SIX': # special case with OE5SIX (Clubstation)
return True return True
return False return False
def replace_substring_with_line(path, search_substring, verbose=0): def replace_substring_with_line(path, search_substring, verbose=0):
try: try:
with open(path, 'r') as file: if not replace_substring_with_line.lines:
lines = file.readlines() with open(path, 'r') as file:
replace_substring_with_line.lines = file.readlines()
search_substring
for line in lines: for line in replace_substring_with_line.lines:
if search_substring[0:40].lower() in line.lower(): if search_substring[0:46].lower() in line.lower():
modified_line = line.strip() modified_line = line.strip()
# Replace the substring with the whole line # Replace the substring with the whole line
## line = line.lower().replace(search_substring.lower(), modified_line) ## line = line.lower().replace(search_substring.lower(), modified_line)
@ -127,6 +132,153 @@ def replace_substring_with_line(path, search_substring, verbose=0):
return search_substring return search_substring
replace_substring_with_line.lines = None
def gender_substring(path, search_substring, verbose=0):
try:
if not gender_substring.lines:
with open(path, 'r') as file:
gender_substring.lines = file.readlines()
for line in gender_substring.lines:
if search_substring[2:].lower() in line.lower():
return line[0]
except FileNotFoundError:
print(f'The file {path} was not found.')
except Exception as e:
print(f'An error occurred: {e}')
return 'x' # not found, unknown gender
gender_substring.lines = None
def get_gender(firstnames, surname, call, verbose=0):
# load the .gender file:
genderfile = '.gender'
gender = 'x'
gpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), genderfile)
if os.path.exists(gpath):
firstname = firstnames.split(' ', 1)[0]
gender = gender_substring(gpath, firstname, verbose=0) # only check 1st/firstname of name, important when there are more than 1 firstnames
if gender == 'x':
if verbose > 0:
get_gender.cnt += 1
print(f'({get_gender.cnt}){call} "{firstname}" [{firstnames} {surname}] not found in file {genderfile} - gender "x" is set.')
return gender
get_gender.cnt = 0
def call_split_name(fullname, call, verbose):
assert(len(fullname) > 1)
name = fullname.split(' ', 1)
surname = name[0]
# several special cases like surname "de Lijezer", "van Dijk", "el Shamaa", etc.
match surname.lower():
case 'de' | 'el':
name = fullname[3:].split(' ',1)
surname = surname.lower() + ' ' + name[0]
if verbose > 0:
print(f'## {fullname} --> {surname} ##')
case 'van' | 'von' :
name = fullname[4:].split(' ',1)
surname = surname.lower() + ' ' + name[0]
if surname.lower() in ['van der', 'von der', 'van den']: # e.g. "van der Meulen", "Walther von der Vogelweide", "Annie van den Berg"
name = fullname[8:].split(' ',1)
surname = surname.lower() + ' ' + name[0]
if verbose > 0:
print(f'## {fullname} --> {surname} ##')
case 'della' : # Ancient Italian noble family "della Rowere"
name = fullname[6:].split(' ',1)
surname = surname.lower() + ' ' + name[0]
if verbose > 0:
print(f'## {fullname} --> {surname} ##')
if len(name) > 1:
firstname = name[1]
else:
firstname = '<unknown>'
# In Austria the call suffix starting with Y is an YL (young lady)
# if call[3].upper() == 'Y':
if False:
gender = 'f'
else:
gender = get_gender(firstname, surname, call, verbose)
return firstname, surname, gender
def fix_typo(call, fullname, verbose=1):
fixtypofile = '.typo_callbook'
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), fixtypofile)
if os.path.exists(path):
try:
if not fix_typo.lines:
with open(path, 'r') as file:
fix_typo.lines = file.readlines()
for line in fix_typo.lines[4:]: # starting with line 4
if len(fix_typo.spaces) == 0: # not initialized
if line[0] == '*':
words = line.split()
assert len(words) == 4 # i.e. '*, call, nachname, vorname'
fix_typo.spaces = [line.index(words[1]), line.index(words[2]), line.index(words[3])]
else:
if call in line[2:8]:
print(f'Call: {call} found')
match line[0]:
case '#':
if verbose > 1:
print(line.rstrip())
case 'F':
if verbose > 0:
print(fullname)
print(line.rstrip())
firstname1, surname1, gender1 = call_split_name(fullname, call, 0)
fullname2 = line[fix_typo.spaces[1]:fix_typo.spaces[2]-1].rstrip() + ' ' + line[fix_typo.spaces[2]:-1]
if verbose > 0:
print(fullname2)
firstname2, surname2, gender2 = call_split_name(fullname2, call, 0)
# Hardening: at a minimum, either the firstnames or the surenames must fit
fix_cnt = 0
if (firstname1 != firstname2):
fix_cnt += 1
if (surname1 != surname2):
fix_cnt += 1
if fix_cnt == 0:
print(f'It is fixed! You can remove the line with the item {call} from the file {fixtypofile}!')
elif fix_cnt > 1:
print(f'Something went wrong, there are several bugs. Check line with call {call} in file {fixtypofile}!')
else:
fullname = fullname2
case 'X': # exchange the surname with firstname
if verbose > 0:
print(fullname)
print(line.rstrip())
firstname1, surname1, gender1 = call_split_name(fullname, call, 0)
fullname2 = line[fix_typo.spaces[1]:fix_typo.spaces[2]-1].rstrip() + ' ' + line[fix_typo.spaces[2]:-1]
if verbose > 0:
print(fullname2)
firstname2, surname2, gender2 = call_split_name(fullname2, call, 0)
fix_cnt = 0
if (firstname1 == firstname2) and (surname1 == surname2):
print(f'It is fixed! You can remove the line with the item {call} from the file {fixtypofile}!')
elif (firstname1 != surname2) or (surname1 != firstname2):
print(f'Something went wrong, there are several bugs. Check line with call {call} in file {fixtypofile}!')
else:
fullname = fullname2
except FileNotFoundError:
print(f'The file {path} was not found.')
except Exception as e:
print(f'An error occurred: {e}')
return fullname
fix_typo.lines = None
fix_typo.spaces = []
def call_data_record(line, mod_date, verbose): def call_data_record(line, mod_date, verbose):
@ -153,31 +305,33 @@ def call_data_record(line, mod_date, verbose):
match = re.search(r'^(OE[0-9][A-Z]{1,3})', call) match = re.search(r'^(OE[0-9][A-Z]{1,3})', call)
assert(match.string == call) assert(match.string == call)
fullname = records[1] fullname = records[1]
location = records[2]
address = records[3]
permit_class = records[4]
fullname = fix_typo(call, fullname, verbose)
# If there is a clubstation # If there is a clubstation
if is_clubstation(call): if is_clubstation(call):
# Name starting with only one quotation marks e.g. " -- remove that one: # Name starting with only one quotation marks e.g. " -- remove that one:
fullname = remove_first_quote_if_odd(fullname, verbose) # only found @ clubstations fullname = remove_first_quote_if_odd(fullname, verbose) # only found @ clubstations
clubstationfile = '.callbook_club' clubstationfile = '.callbook_club'
if verbose > 0: if verbose > 1:
print(f'Call: {call}, Name: {fullname}') print(f'Call: {call}, Name: {fullname}')
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), clubstationfile) path = os.path.join(os.path.dirname(os.path.abspath(__file__)), clubstationfile)
if os.path.exists(path): if os.path.exists(path):
fullname = replace_substring_with_line(path, fullname, verbose) fullname = replace_substring_with_line(path, fullname, verbose)
if verbose > 0: gender = '*'
print(f'Call: {call}, Name: {fullname}') elif fullname[0] == '*':
gender = '*'
else: # Try to split the YL or OMs Name, guess the gender
firstname, surname, gender = call_split_name(fullname, call, verbose)
if verbose > 1:
if gender == '*':
print(f'Call: {call}, Name: {fullname}, Gender: {gender}')
else:
print(f'Call: {call}, First Name: {firstname}, Surname: {surname}, Gender: {gender}')
print(f'Location: {location}, Address: {address}, Permit: {permit_class}')
# if not record:
# return
# if verbose == 1:
# print(record.group(1))
# if verbose >= 3:
# print(f'Call: {record.group(1)}')
# print(f'Name: {record.group(2)}')
# #print(f'Location: {record[3]}')
# #print(f'Address: {record[4]}')
# #print(f'Permit Class: {record[5]}')
def call_analyse_pdf(file, verbose): def call_analyse_pdf(file, verbose):
@ -196,13 +350,13 @@ def call_analyse_pdf(file, verbose):
print(f' Title: {meta.title}') print(f' Title: {meta.title}')
print(f' Created: {meta.creation_date}') print(f' Created: {meta.creation_date}')
print(f'Modified: {meta.modification_date}') print(f'Modified: {meta.modification_date}')
for page in reader.pages: for page in reader.pages:
page_text = page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False) page_text = page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False)
lines = page_text.strip().splitlines() lines = page_text.strip().splitlines()
for line in lines[3:-2]: for line in lines[3:-2]:
line = line.strip() line = line.strip()
# calls = re.findall(r' +(OE[0-9][A-Z]{1,3}).*$', page_text)
if verbose >= 2: if verbose >= 2:
print(line) print(line)
call_data_record(line, meta.modification_date,verbose) call_data_record(line, meta.modification_date,verbose)
@ -210,11 +364,11 @@ def call_analyse_pdf(file, verbose):
if __name__ == '__main__': if __name__ == '__main__':
# call_description() # call_description()
args = call_parser() args = call_parser()
# filename = 'Rufzeichenliste_AT_Stand_010624.pdf'
try: try:
filename = call_website(**vars(args)) filename = call_website(**vars(args))
if args.verbose > 1:
print(f'Filename: {filename}') print(f'Filename: {filename}')
call_analyse_pdf(filename,args.verbose) call_analyse_pdf(filename,args.verbose)
sys.exit(0) sys.exit(0)
except Exception as e: except Exception as e: