ADD typo fixing file

This commit is contained in:
Thomas Kuschel 2024-06-17 19:16:28 +02:00
parent ad1a7c4507
commit 5814cb1cfe
5 changed files with 726 additions and 33 deletions

View File

@ -2,13 +2,17 @@ Landesverband Wien im Österreichischen Versuchssenderverband
Amateurfunkverein des Österreichischen Bundesheeres - Austrian Military Radio Society
Radio-Amateur-Klub der Technischen Universität Wien - Radio Amateur Club of the TU Wien
Landesverband Tirol des Österreichischen Versuchssenderverbands
Österreichisches Rotes Kreuz, Landesverband Vorarlberg
Kulturverein der österreichischen Eisenbahner - Sektion Amateurfunk
Kulturverein der österreichischen Eisenbahner, Sektion Amateurfunk
Höhere Technische Bundeslehr- und Versuchsanstalt Innsbruck Anichstraße
Österreichisches Rotes Kreuz, Landesverband Tirol
Österreichisches Rotes Kreuz, Landesverband Vorarlberg
Österreichisches Rotes Kreuz, Landesverband Steiermark
Johanniter Tirol Rettungs- und Einsatzdienste mildtätige GmbH
Österreichischer Versuchssenderverband - Dachverband
Landesverband Niederösterreich des Österreichischen Versuchssenderverbands
Amateurfunkverband Salzburg - Landesverband des Österreichischen Versuchssenderverbandes
"OAFV" des ÖVSV, Ortsgruppe Ried - Grieskirchen
OÖ Amateurfunkverband, Ortsgruppe Ried-Grieskirchen
OE3XHT - Amateurfunkverein an der HTL St. Pölten
Stadtgemeinde Feldkirch Risiko- und Katastrophen-Schutz
IPA,LANDESGRUPPE VBG., z.Hd.Herrn Longhi Harald OE9HLH

528
afu/.gender Normal file
View File

@ -0,0 +1,528 @@
m Abdul
m Abdull
m Achaz
m Adalbert
m Adalberto
m Alastair
m Albin
m Albrecht
m Aleksandar
m Aleksander
m Aleksey
m Alexander
m Alfons
m Alfger
m Aljoscha
m Aljosha
m Alois
m Alvaro
m András
m Andràs
m Andreas
m Andrej
m Ansgar
m Anton
m Archibald
m Arkadiusz
m Armin
m Arno
m Arnold
m Arnulf
m Arpad
m Arthur
m Artur
m Attila
m Augustinus
m Avdija
m Baldur
m Bartlmä
m Benedict
m Benedikt
m Benjamin
m Bernard
m Bernhard
m Berthold
m Bertram
m Bernd
m Bevan
m Bogoljub
m Boguslaw
m Bohumil
m Branko
m Brenden
m Bruno
m Burghard
m Burkhard
m Burkhart
m Camillo
m Carl
m Carlo
m Carlos
m Cezar-Iuliu
m Charles
m Christian
m Christian-Andrei
m Christian-Erich
m Christof
m Christoph
m Christopher
m Chungil
m Claudio
m Claus
m Clemens
m Cornelius
m Craig
m Cyrill
m Daniel
m David
m Denis
m Detlef
m Dietmar
m Domenik
m Dominic
m Dominik
m Dominique
m Dragan
m Guenter
m Eckart
m Eduard
m Edward
m Egidius
m Emmerich
m Engelbert
m Enrico
m Ernest
m Ernst
m Erwin
m Eugen
m Fabian
m Fabio
m Felix
m Ferdinand
m Filip-Jan
m Florian
m Franc
m Francesco
m Frank
m Franz
m Franz-Joseph
m Frederick
m Frederikus
m Fredy
m Friedmann
m Friedrich
m Fridolin
m Fritz
m Frohwald
m Gabriel
m Gavril
m Gebhard
m Georg
m Gerfried
m Gerhard
m Gernod
m Gernot
m Gerold
m Giulio
m Gisbert
m Gottfried
m Gottlieb
m Gregor
m Gregor-Emanuel
m Guenther
m Guido
m Gunnar
m Günther
m Guntram
m Gustav
m Gustav-Josef
m Hanno
m Hanns
m Hanns-Michael
m Hans
m Hans-Christian
m Hans-Christoph
m Hans-Ewald
m Hans-Joachim
m Hans-Jörg
m Hans-Jürgen
m Hans-Otto
m Hans-Peter
m Hans-Werner
m Hansjoerg
m Hansjörg
m Harald
m Harald-Thomas
m Harry
m Hartmut
m Hartwig
m Heimo
m Heiner-Anton
m Heinrich
m Heinz
m Hellmut
m Hellmuth
m Helmut
m Helmuth
m Henning
m Henryk
m Heribert
m Hermann
m Herolind
m Herwig
m Holger
m Horst
m Hubert
m Ingo
m Ingulf
m Ivaylo
m Izudin
m Jacob
m Jakob
m Jan
m Jann-Steffen
m Jochen
m Joerg
m Jonas
m Jonathan
m Josef
m Josef-Manfred
m Joseph-Maria
m Jozef
m Josip
m Johann
m Johannes
m Julian
m Julius
m Junichi
m Jürgen
m Karl-Heinz
m Karl-Otto
m Karl-Thomas
m Karlheinz
m Katarina
m Kevin
m Klaus
m Klaus-Dieter
m Klaus-Jürgen
m Klemens
m Konrad
m Konstantin
m Krzysztof
m Laurence OE6LUN
m Laurenz
m Laurin
m Laszlo
m Leonel
m Leonhard
m Leopold
m Liam
m Lothar
m Lucas
m Luciano
m Ludwig
m Lukas
m Manfred
m Manuel
m Marcel
m Marcin
m Marco
m Marcus
m Mario
m Mario-Rafael
m Marius
m Marko
m Markus
m Martin
m Marzell
m Matija
m Matteo
m Matteo-Alessandro
m Matthäus
m Matthew
m Matthias
m Maxim
m Maximilian
m Meinrad
m Meletios
m Michael
m Mihaly
m Mikhail
m Mirian
m Mirijan
m Miroslav
m Monty
m Muhammed
m Murat
m Nanak
m Nicolai
m Nicolas
m Niels-Henrik
m Niklas
m Nikolas
m Mikolaus
m Noah
m Norbert
m Ole-Christian
m Oliver
m Oliver-Helmut
m Ortwin
m Othmar
m Ovidiu
m Ovidiu-Dan
m Patrick
m Patrik
m Paul
m Paulino
m Peter
m Peter-Ernst
m Peter-Holger
m Peter-Philipp
m Petros
m Philemon
m Philipp
m Piotr
m Primoz
m Radovan
m Raimund
m Rainer
m Raffael
m Raffi
m Raoul
m Raphael
m Ralph
m Reinald
m Reinhard
m Reinhart
m Rembert
m Riccardo
m Richard
m Reinhold
m René
m René-Lysander
m Roland
m Rolf-Dietrich
m Romain
m Roman
m Ronald
m Rüdiger
m Rudolf
m Rupert
m Samuel
m Sándor
m Sandro
m Santiago
m Sebastian
m Seong
m Severin
m Siegfried
m Siegmar
m Simon
m Stefan
m Stefano
m Steffen
m Stelian-Gabriel
m Stelio
m Stephan
m Stephen
m Subagio-Rasidi
m Sven-Erik
m Tamim
m Theodor
m Thomas
m Thomas-Michael
m Thorsten
m Tillmann
m Tobias
m Tomislav
m Tommaso
m Tonny
m Ümmet
m Urban
m Valentin
m Valerian
m Viktor
m Viorel
m Vjekoslav
m Vladimir
m Volker
m Waldemar
m Walter
m Werner
m Wieland
m Wigbert
m Willibald
m Wilfried
m Wilfrid
m Wilhelm
m Wolf-Dieter
m Wolfred
m Winfried
m Wolfgang
m Wolfram
m Yannic
m Zeljko
m Zlatko
m Zvonko
w Adelheid
w Alexandra
w Andrea
w Angela
w Angelika
w Anita
w Anna
w Anna-Maria
w Anneliese
w Annemarie
w Astrid
w Auguste
w Barbara
w Beatrice
w Beatrix
w Bernadette
w Bernardine
w Bettina
w Bianca
w Birgit
w Brigitte
w Britta
w Carmen
w Chiara
w Christa
w Christine
w Christl
w Cornelia
w Durdica
w Edeltraud
w Elfriede
w Elisabeth
w Elke
w Erdmuthe
w Ernestine
w Eva-Maria
w Eveline
w Evelyn
w Flora
w Franziska
w Frederike
w Frieda
w Friederike
w Gabriela
w Gabriella
w Gabriele
w Gerda
w Gerlinde
w Gertraude
w Gertrude
w Gisela
w Gudrun
w Gunhild
w Gustav
w Hannelore
w Heidelinde
w Heidi
w Heidrun
w Helga
w Hemma
w Hermine
w Herta
w Hildegard
w Ingeborg
w Ingeburg
w Ingrid
w Isabel
w Isabella
w Isolde
w Janet
w Jasmin
w Jemilla-Katalin
w Jessica
w Johanna
w Josefine
w Julia
w Juliana
w Jutta
w Karin
w Karolina
w Karoline
w Katharina
w Kathrin
w Katja
w Katrin
w Kerstin
w Klaudia
w Laila
w Larissa
w Leonie
w Lieselotte
w Ligia
w Lisbeth
w Lygia
w Luisa
w Luiza
w Magdalena
w Manfreda
w Manuela
w Margareta
w Margarethe
w Margot
w Margret
w Marianne
w Marie-Luise
w Marina
w Marion
w Marlene
w Martha
w Martina
w Mathilde
w Mechthild
w Melanie
w Michaela
w Nadine
w Natasa
w Natascha
w Nicole
w Nikolitsa
w Nina
w Noriko
w Olivia
w Patrizia
w Paulina
w Pauline
w Phaedra
w Regina
w Reinhilde
w Renate
w Renee
w Rosina
w Roswitha
w Sabine
w Sandra
w Senada
w Ricarda
w Sieglinde
w Silvia
w Simone
w Solveig
w Sonja
w Sophia
w Sophie
w Stefanie
w Steffi
w Stephanie
w Susanne
w Sybille
w Tadeja
w Tamara
w Tanja
w Tatjana
w Theresia
w Ulrike
w Ursula
w Valerie
w Valery
w Veronika
w Victoria
w Waldtraud
w Waltraud
w Yvonne

7
afu/.typo_callbook Normal file
View File

@ -0,0 +1,7 @@
# TYPO CALLBOOK - 2024-06-17 Version 1.0.0
# X ... Nachname mit Vorname(n) vertauscht
# F ... Vorname(n) <firstname> falsch geschrieben
# You have to write the values exactly under the titles Nachname, Vorname
* Rufz Nachname Vorname
F OE1CGC Gasser Christoph
X OE5ENN Kolmhofer Erich

View File

@ -228,4 +228,4 @@ Clone the repository `script` to your site with:
$ yay -S python-mysql-connector
Hint: At the moment the compilation fails. Will be updated soon.
See https://jira.mariadb.org/projects/CONPY/issues/CONPY-284
See https://jira.mariadb.org/projects/CONPY/issues/CONPY-284 (2024-06-16)

View File

@ -33,13 +33,16 @@ def call_parser():
parser.add_argument('-V', '--version', action='version', version='{} {}'.format(os.path.split(__file__)[1],__version__))
parser.add_argument('-v', '--verbose', action='append_const', const = 1)
parser.add_argument('-p', '--path', default='Rufzeichenliste_AT_Stand_010624.pdf', help= 'skip the download if the specified path to a PDF file exists')
# parser.add_argument('-t', '--type', default='' , help='specify the output, supported types are [ CSV | JSON ]') # not implemented yet
parser.add_argument('-o', '--output', default='', help='specify the file where the data are written to, default stdout')
parser.add_argument('-m', '--mariadb', help='SQL interface to MariaDB (MySql) format "<IP-Address>:<Port> <User> <Passwd>" or defined in .config')
parser.add_argument('url', metavar='URL', nargs='?', default=__website__)
opt = parser.parse_args()
opt.verbose = 0 if opt.verbose is None else sum(opt.verbose)
return opt
def call_website(url,verbose,path='',interactive=False):
def call_website(url,verbose,path='',interactive=False,output='',mariadb=''):
if path:
if os.path.exists(path):
@ -78,7 +81,7 @@ def call_website(url,verbose,path='',interactive=False):
if(interactive):
time.sleep(300)
else:
time.sleep(5)
time.sleep(4)
driver.close()
return os.path.basename(href)
@ -91,29 +94,31 @@ def remove_first_quote_if_odd(text, verbose = 0):
# Find and remove the first quote
for i, char in enumerate(text):
if char in ['"']: # ['"', "'"]:
if (verbose > 0):
if (verbose > 1):
print(text)
text = text[:i] + text[i+1:]
if (verbose > 0):
if (verbose > 1):
print(text)
break
return text
def is_clubstation(call):
assert(len(call) > 3)
if call[3].upper() == 'X':
if call[3].upper() == 'X' or call.upper() == 'OE5SIX': # special case with OE5SIX (Clubstation)
return True
return False
def replace_substring_with_line(path, search_substring, verbose=0):
try:
with open(path, 'r') as file:
lines = file.readlines()
if not replace_substring_with_line.lines:
with open(path, 'r') as file:
replace_substring_with_line.lines = file.readlines()
search_substring
for line in lines:
if search_substring[0:40].lower() in line.lower():
for line in replace_substring_with_line.lines:
if search_substring[0:46].lower() in line.lower():
modified_line = line.strip()
# Replace the substring with the whole line
## line = line.lower().replace(search_substring.lower(), modified_line)
@ -127,6 +132,153 @@ def replace_substring_with_line(path, search_substring, verbose=0):
return search_substring
replace_substring_with_line.lines = None
def gender_substring(path, search_substring, verbose=0):
try:
if not gender_substring.lines:
with open(path, 'r') as file:
gender_substring.lines = file.readlines()
for line in gender_substring.lines:
if search_substring[2:].lower() in line.lower():
return line[0]
except FileNotFoundError:
print(f'The file {path} was not found.')
except Exception as e:
print(f'An error occurred: {e}')
return 'x' # not found, unknown gender
gender_substring.lines = None
def get_gender(firstnames, surname, call, verbose=0):
# load the .gender file:
genderfile = '.gender'
gender = 'x'
gpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), genderfile)
if os.path.exists(gpath):
firstname = firstnames.split(' ', 1)[0]
gender = gender_substring(gpath, firstname, verbose=0) # only check 1st/firstname of name, important when there are more than 1 firstnames
if gender == 'x':
if verbose > 0:
get_gender.cnt += 1
print(f'({get_gender.cnt}){call} "{firstname}" [{firstnames} {surname}] not found in file {genderfile} - gender "x" is set.')
return gender
get_gender.cnt = 0
def call_split_name(fullname, call, verbose):
assert(len(fullname) > 1)
name = fullname.split(' ', 1)
surname = name[0]
# several special cases like surname "de Lijezer", "van Dijk", "el Shamaa", etc.
match surname.lower():
case 'de' | 'el':
name = fullname[3:].split(' ',1)
surname = surname.lower() + ' ' + name[0]
if verbose > 0:
print(f'## {fullname} --> {surname} ##')
case 'van' | 'von' :
name = fullname[4:].split(' ',1)
surname = surname.lower() + ' ' + name[0]
if surname.lower() in ['van der', 'von der', 'van den']: # e.g. "van der Meulen", "Walther von der Vogelweide", "Annie van den Berg"
name = fullname[8:].split(' ',1)
surname = surname.lower() + ' ' + name[0]
if verbose > 0:
print(f'## {fullname} --> {surname} ##')
case 'della' : # Ancient Italian noble family "della Rowere"
name = fullname[6:].split(' ',1)
surname = surname.lower() + ' ' + name[0]
if verbose > 0:
print(f'## {fullname} --> {surname} ##')
if len(name) > 1:
firstname = name[1]
else:
firstname = '<unknown>'
# In Austria the call suffix starting with Y is an YL (young lady)
# if call[3].upper() == 'Y':
if False:
gender = 'f'
else:
gender = get_gender(firstname, surname, call, verbose)
return firstname, surname, gender
def fix_typo(call, fullname, verbose=1):
fixtypofile = '.typo_callbook'
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), fixtypofile)
if os.path.exists(path):
try:
if not fix_typo.lines:
with open(path, 'r') as file:
fix_typo.lines = file.readlines()
for line in fix_typo.lines[4:]: # starting with line 4
if len(fix_typo.spaces) == 0: # not initialized
if line[0] == '*':
words = line.split()
assert len(words) == 4 # i.e. '*, call, nachname, vorname'
fix_typo.spaces = [line.index(words[1]), line.index(words[2]), line.index(words[3])]
else:
if call in line[2:8]:
print(f'Call: {call} found')
match line[0]:
case '#':
if verbose > 1:
print(line.rstrip())
case 'F':
if verbose > 0:
print(fullname)
print(line.rstrip())
firstname1, surname1, gender1 = call_split_name(fullname, call, 0)
fullname2 = line[fix_typo.spaces[1]:fix_typo.spaces[2]-1].rstrip() + ' ' + line[fix_typo.spaces[2]:-1]
if verbose > 0:
print(fullname2)
firstname2, surname2, gender2 = call_split_name(fullname2, call, 0)
# Hardening: at a minimum, either the firstnames or the surenames must fit
fix_cnt = 0
if (firstname1 != firstname2):
fix_cnt += 1
if (surname1 != surname2):
fix_cnt += 1
if fix_cnt == 0:
print(f'It is fixed! You can remove the line with the item {call} from the file {fixtypofile}!')
elif fix_cnt > 1:
print(f'Something went wrong, there are several bugs. Check line with call {call} in file {fixtypofile}!')
else:
fullname = fullname2
case 'X': # exchange the surname with firstname
if verbose > 0:
print(fullname)
print(line.rstrip())
firstname1, surname1, gender1 = call_split_name(fullname, call, 0)
fullname2 = line[fix_typo.spaces[1]:fix_typo.spaces[2]-1].rstrip() + ' ' + line[fix_typo.spaces[2]:-1]
if verbose > 0:
print(fullname2)
firstname2, surname2, gender2 = call_split_name(fullname2, call, 0)
fix_cnt = 0
if (firstname1 == firstname2) and (surname1 == surname2):
print(f'It is fixed! You can remove the line with the item {call} from the file {fixtypofile}!')
elif (firstname1 != surname2) or (surname1 != firstname2):
print(f'Something went wrong, there are several bugs. Check line with call {call} in file {fixtypofile}!')
else:
fullname = fullname2
except FileNotFoundError:
print(f'The file {path} was not found.')
except Exception as e:
print(f'An error occurred: {e}')
return fullname
fix_typo.lines = None
fix_typo.spaces = []
def call_data_record(line, mod_date, verbose):
@ -153,32 +305,34 @@ def call_data_record(line, mod_date, verbose):
match = re.search(r'^(OE[0-9][A-Z]{1,3})', call)
assert(match.string == call)
fullname = records[1]
location = records[2]
address = records[3]
permit_class = records[4]
fullname = fix_typo(call, fullname, verbose)
# If there is a clubstation
if is_clubstation(call):
# Name starting with only one quotation marks e.g. " -- remove that one:
fullname = remove_first_quote_if_odd(fullname, verbose) # only found @ clubstations
clubstationfile = '.callbook_club'
if verbose > 0:
if verbose > 1:
print(f'Call: {call}, Name: {fullname}')
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), clubstationfile)
if os.path.exists(path):
fullname = replace_substring_with_line(path, fullname, verbose)
if verbose > 0:
print(f'Call: {call}, Name: {fullname}')
gender = '*'
elif fullname[0] == '*':
gender = '*'
else: # Try to split the YL or OMs Name, guess the gender
firstname, surname, gender = call_split_name(fullname, call, verbose)
if verbose > 1:
if gender == '*':
print(f'Call: {call}, Name: {fullname}, Gender: {gender}')
else:
print(f'Call: {call}, First Name: {firstname}, Surname: {surname}, Gender: {gender}')
print(f'Location: {location}, Address: {address}, Permit: {permit_class}')
# if not record:
# return
# if verbose == 1:
# print(record.group(1))
# if verbose >= 3:
# print(f'Call: {record.group(1)}')
# print(f'Name: {record.group(2)}')
# #print(f'Location: {record[3]}')
# #print(f'Address: {record[4]}')
# #print(f'Permit Class: {record[5]}')
def call_analyse_pdf(file, verbose):
# Define a regular expression to match tables
@ -202,7 +356,7 @@ def call_analyse_pdf(file, verbose):
lines = page_text.strip().splitlines()
for line in lines[3:-2]:
line = line.strip()
# calls = re.findall(r' +(OE[0-9][A-Z]{1,3}).*$', page_text)
if verbose >= 2:
print(line)
call_data_record(line, meta.modification_date,verbose)
@ -210,11 +364,11 @@ def call_analyse_pdf(file, verbose):
if __name__ == '__main__':
# call_description()
args = call_parser()
# filename = 'Rufzeichenliste_AT_Stand_010624.pdf'
try:
filename = call_website(**vars(args))
print(f'Filename: {filename}')
if args.verbose > 1:
print(f'Filename: {filename}')
call_analyse_pdf(filename,args.verbose)
sys.exit(0)
except Exception as e: