ADD typo fixing file

2024-06-17 19:16:28 +02:00
Commit 5814cb1cfe
@@ -2,13 +2,17 @@ Landesverband Wien im Österreichischen Versuchssenderverband
 Amateurfunkverein des Österreichischen Bundesheeres - Austrian Military Radio Society
 Radio-Amateur-Klub der Technischen Universität Wien - Radio Amateur Club of the TU Wien
 Landesverband Tirol des Österreichischen Versuchssenderverbands
-Österreichisches Rotes Kreuz, Landesverband Vorarlberg
-Kulturverein der österreichischen Eisenbahner - Sektion Amateurfunk
+Kulturverein der österreichischen Eisenbahner, Sektion Amateurfunk
 Höhere Technische Bundeslehr- und Versuchsanstalt Innsbruck Anichstraße
 Österreichisches Rotes Kreuz, Landesverband Tirol
+Österreichisches Rotes Kreuz, Landesverband Vorarlberg
+Österreichisches Rotes Kreuz, Landesverband Steiermark
 Johanniter Tirol Rettungs- und Einsatzdienste mildtätige GmbH
 Österreichischer Versuchssenderverband - Dachverband
 Landesverband Niederösterreich des Österreichischen Versuchssenderverbands
 Amateurfunkverband Salzburg - Landesverband des Österreichischen Versuchssenderverbandes
 "OAFV" des ÖVSV, Ortsgruppe Ried - Grieskirchen
 OÖ Amateurfunkverband, Ortsgruppe Ried-Grieskirchen
+OE3XHT - Amateurfunkverein an der HTL St. Pölten
+Stadtgemeinde Feldkirch  Risiko- und Katastrophen-Schutz
+IPA,LANDESGRUPPE VBG., z.Hd.Herrn Longhi Harald OE9HLH
@@ -0,0 +1,528 @@
+m Abdul
+m Abdull
+m Achaz
+m Adalbert
+m Adalberto
+m Alastair
+m Albin
+m Albrecht
+m Aleksandar
+m Aleksander
+m Aleksey
+m Alexander
+m Alfons
+m Alfger
+m Aljoscha
+m Aljosha
+m Alois
+m Alvaro
+m András
+m Andràs
+m Andreas
+m Andrej
+m Ansgar
+m Anton
+m Archibald
+m Arkadiusz
+m Armin
+m Arno
+m Arnold
+m Arnulf
+m Arpad
+m Arthur
+m Artur
+m Attila
+m Augustinus
+m Avdija
+m Baldur
+m Bartlmä
+m Benedict
+m Benedikt
+m Benjamin
+m Bernard
+m Bernhard
+m Berthold
+m Bertram
+m Bernd
+m Bevan
+m Bogoljub
+m Boguslaw
+m Bohumil
+m Branko
+m Brenden
+m Bruno
+m Burghard
+m Burkhard
+m Burkhart
+m Camillo
+m Carl
+m Carlo
+m Carlos
+m Cezar-Iuliu
+m Charles
+m Christian
+m Christian-Andrei
+m Christian-Erich
+m Christof
+m Christoph
+m Christopher
+m Chungil
+m Claudio
+m Claus
+m Clemens
+m Cornelius
+m Craig
+m Cyrill
+m Daniel
+m David
+m Denis
+m Detlef
+m Dietmar
+m Domenik
+m Dominic
+m Dominik
+m Dominique
+m Dragan
+m Guenter
+m Eckart
+m Eduard
+m Edward
+m Egidius
+m Emmerich
+m Engelbert
+m Enrico
+m Ernest
+m Ernst
+m Erwin
+m Eugen
+m Fabian
+m Fabio
+m Felix
+m Ferdinand
+m Filip-Jan
+m Florian
+m Franc
+m Francesco
+m Frank
+m Franz
+m Franz-Joseph
+m Frederick
+m Frederikus
+m Fredy
+m Friedmann
+m Friedrich
+m Fridolin
+m Fritz
+m Frohwald
+m Gabriel
+m Gavril
+m Gebhard
+m Georg
+m Gerfried
+m Gerhard
+m Gernod
+m Gernot
+m Gerold
+m Giulio
+m Gisbert
+m Gottfried
+m Gottlieb
+m Gregor
+m Gregor-Emanuel
+m Guenther
+m Guido
+m Gunnar
+m Günther
+m Guntram
+m Gustav
+m Gustav-Josef
+m Hanno
+m Hanns
+m Hanns-Michael
+m Hans
+m Hans-Christian
+m Hans-Christoph
+m Hans-Ewald
+m Hans-Joachim
+m Hans-Jörg
+m Hans-Jürgen
+m Hans-Otto
+m Hans-Peter
+m Hans-Werner
+m Hansjoerg
+m Hansjörg
+m Harald
+m Harald-Thomas
+m Harry
+m Hartmut
+m Hartwig
+m Heimo
+m Heiner-Anton
+m Heinrich
+m Heinz
+m Hellmut
+m Hellmuth
+m Helmut
+m Helmuth
+m Henning
+m Henryk
+m Heribert
+m Hermann
+m Herolind
+m Herwig
+m Holger
+m Horst
+m Hubert
+m Ingo
+m Ingulf
+m Ivaylo
+m Izudin
+m Jacob
+m Jakob
+m Jan
+m Jann-Steffen
+m Jochen
+m Joerg
+m Jonas
+m Jonathan
+m Josef
+m Josef-Manfred
+m Joseph-Maria
+m Jozef
+m Josip
+m Johann
+m Johannes
+m Julian
+m Julius
+m Junichi
+m Jürgen
+m Karl-Heinz
+m Karl-Otto
+m Karl-Thomas
+m Karlheinz
+m Katarina
+m Kevin
+m Klaus
+m Klaus-Dieter
+m Klaus-Jürgen
+m Klemens
+m Konrad
+m Konstantin
+m Krzysztof
+m Laurence OE6LUN
+m Laurenz
+m Laurin
+m Laszlo
+m Leonel
+m Leonhard
+m Leopold
+m Liam
+m Lothar
+m Lucas
+m Luciano
+m Ludwig
+m Lukas
+m Manfred
+m Manuel
+m Marcel
+m Marcin
+m Marco
+m Marcus
+m Mario
+m Mario-Rafael
+m Marius
+m Marko
+m Markus
+m Martin
+m Marzell
+m Matija
+m Matteo
+m Matteo-Alessandro
+m Matthäus
+m Matthew
+m Matthias
+m Maxim
+m Maximilian
+m Meinrad
+m Meletios
+m Michael
+m Mihaly
+m Mikhail
+m Mirian
+m Mirijan
+m Miroslav
+m Monty
+m Muhammed
+m Murat
+m Nanak
+m Nicolai
+m Nicolas
+m Niels-Henrik
+m Niklas
+m Nikolas
+m Mikolaus
+m Noah
+m Norbert
+m Ole-Christian
+m Oliver
+m Oliver-Helmut
+m Ortwin
+m Othmar
+m Ovidiu
+m Ovidiu-Dan
+m Patrick
+m Patrik
+m Paul
+m Paulino
+m Peter
+m Peter-Ernst
+m Peter-Holger
+m Peter-Philipp
+m Petros
+m Philemon
+m Philipp
+m Piotr
+m Primoz
+m Radovan
+m Raimund
+m Rainer
+m Raffael
+m Raffi
+m Raoul
+m Raphael
+m Ralph
+m Reinald
+m Reinhard
+m Reinhart
+m Rembert
+m Riccardo
+m Richard
+m Reinhold
+m René
+m René-Lysander
+m Roland
+m Rolf-Dietrich
+m Romain
+m Roman
+m Ronald
+m Rüdiger
+m Rudolf
+m Rupert
+m Samuel
+m Sándor
+m Sandro
+m Santiago
+m Sebastian
+m Seong
+m Severin
+m Siegfried
+m Siegmar
+m Simon
+m Stefan
+m Stefano
+m Steffen
+m Stelian-Gabriel
+m Stelio
+m Stephan
+m Stephen
+m Subagio-Rasidi
+m Sven-Erik
+m Tamim
+m Theodor
+m Thomas
+m Thomas-Michael
+m Thorsten
+m Tillmann
+m Tobias
+m Tomislav
+m Tommaso
+m Tonny
+m Ümmet
+m Urban
+m Valentin
+m Valerian
+m Viktor
+m Viorel
+m Vjekoslav
+m Vladimir
+m Volker
+m Waldemar
+m Walter
+m Werner
+m Wieland
+m Wigbert
+m Willibald
+m Wilfried
+m Wilfrid
+m Wilhelm
+m Wolf-Dieter
+m Wolfred
+m Winfried
+m Wolfgang
+m Wolfram
+m Yannic
+m Zeljko
+m Zlatko
+m Zvonko
+w Adelheid
+w Alexandra
+w Andrea
+w Angela
+w Angelika
+w Anita
+w Anna
+w Anna-Maria
+w Anneliese
+w Annemarie
+w Astrid
+w Auguste
+w Barbara
+w Beatrice
+w Beatrix
+w Bernadette
+w Bernardine
+w Bettina
+w Bianca
+w Birgit
+w Brigitte
+w Britta
+w Carmen
+w Chiara
+w Christa
+w Christine
+w Christl
+w Cornelia
+w Durdica
+w Edeltraud
+w Elfriede
+w Elisabeth
+w Elke
+w Erdmuthe
+w Ernestine
+w Eva-Maria
+w Eveline
+w Evelyn
+w Flora
+w Franziska
+w Frederike
+w Frieda
+w Friederike
+w Gabriela
+w Gabriella
+w Gabriele
+w Gerda
+w Gerlinde
+w Gertraude
+w Gertrude
+w Gisela
+w Gudrun
+w Gunhild
+w Gustav
+w Hannelore
+w Heidelinde
+w Heidi
+w Heidrun
+w Helga
+w Hemma
+w Hermine
+w Herta
+w Hildegard
+w Ingeborg
+w Ingeburg
+w Ingrid
+w Isabel
+w Isabella
+w Isolde
+w Janet
+w Jasmin
+w Jemilla-Katalin
+w Jessica
+w Johanna
+w Josefine
+w Julia
+w Juliana
+w Jutta
+w Karin
+w Karolina
+w Karoline
+w Katharina
+w Kathrin
+w Katja
+w Katrin
+w Kerstin
+w Klaudia
+w Laila
+w Larissa
+w Leonie
+w Lieselotte
+w Ligia
+w Lisbeth
+w Lygia
+w Luisa
+w Luiza
+w Magdalena
+w Manfreda
+w Manuela
+w Margareta
+w Margarethe
+w Margot
+w Margret
+w Marianne
+w Marie-Luise
+w Marina
+w Marion
+w Marlene
+w Martha
+w Martina
+w Mathilde
+w Mechthild
+w Melanie
+w Michaela
+w Nadine
+w Natasa
+w Natascha
+w Nicole
+w Nikolitsa
+w Nina
+w Noriko
+w Olivia
+w Patrizia
+w Paulina
+w Pauline
+w Phaedra
+w Regina
+w Reinhilde
+w Renate
+w Renee
+w Rosina
+w Roswitha
+w Sabine
+w Sandra
+w Senada
+w Ricarda
+w Sieglinde
+w Silvia
+w Simone
+w Solveig
+w Sonja
+w Sophia
+w Sophie
+w Stefanie
+w Steffi
+w Stephanie
+w Susanne
+w Sybille
+w Tadeja
+w Tamara
+w Tanja
+w Tatjana
+w Theresia
+w Ulrike
+w Ursula
+w Valerie
+w Valery
+w Veronika
+w Victoria
+w Waldtraud
+w Waltraud
+w Yvonne
@@ -0,0 +1,7 @@
+# TYPO CALLBOOK - 2024-06-17 Version 1.0.0
+# X ... Nachname mit Vorname(n) vertauscht
+# F ... Vorname(n) <firstname> falsch geschrieben
+# You have to write the values exactly under the titles Nachname, Vorname
+* Rufz   Nachname    Vorname
+F OE1CGC Gasser      Christoph
+X OE5ENN Kolmhofer   Erich
@@ -228,4 +228,4 @@ Clone the repository `script` to your site with:
 	$ yay -S python-mysql-connector

 Hint: At the moment the compilation fails. Will be updated soon.
- See https://jira.mariadb.org/projects/CONPY/issues/CONPY-284
+ See https://jira.mariadb.org/projects/CONPY/issues/CONPY-284 (2024-06-16)
@@ -33,13 +33,16 @@ def call_parser():
 	parser.add_argument('-V', '--version', action='version', version='{} {}'.format(os.path.split(__file__)[1],__version__))
 	parser.add_argument('-v', '--verbose', action='append_const', const = 1)
 	parser.add_argument('-p', '--path', default='Rufzeichenliste_AT_Stand_010624.pdf', help= 'skip the download if the specified path to a PDF file exists')
+	# parser.add_argument('-t', '--type', default='' , help='specify the output, supported types are [ CSV | JSON ]') # not implemented yet
+	parser.add_argument('-o', '--output', default='', help='specify the file where the data are written to, default stdout')
+	parser.add_argument('-m', '--mariadb', help='SQL interface to MariaDB (MySql) format "<IP-Address>:<Port> <User> <Passwd>" or defined in .config')
 	parser.add_argument('url', metavar='URL', nargs='?', default=__website__)

 	opt = parser.parse_args()
 	opt.verbose = 0 if opt.verbose is None else sum(opt.verbose)
 	return opt

-def call_website(url,verbose,path='',interactive=False):
+def call_website(url,verbose,path='',interactive=False,output='',mariadb=''):

 	if path:
 		if os.path.exists(path):
@@ -78,7 +81,7 @@ def call_website(url,verbose,path='',interactive=False):
 	if(interactive):
 		time.sleep(300)
 	else:
-		time.sleep(5)
+		time.sleep(4)

 	driver.close()
 	return os.path.basename(href)
@@ -91,29 +94,31 @@ def remove_first_quote_if_odd(text, verbose = 0):
 		# Find and remove the first quote
 		for i, char in enumerate(text):
 				if char in ['"']: # ['"', "'"]:
-					if (verbose > 0):
+					if (verbose > 1):
 						print(text)
 					text = text[:i] + text[i+1:]
-					if (verbose > 0):
+					if (verbose > 1):
 						print(text)
 					break
 	return text

 def is_clubstation(call):
 	assert(len(call) > 3)
-	if call[3].upper() == 'X':
+	if call[3].upper() == 'X' or call.upper() == 'OE5SIX': # special case with OE5SIX (Clubstation)
 		return True
+	
 	return False

+
 def replace_substring_with_line(path, search_substring, verbose=0):
 	
 	try:
-		with open(path, 'r') as file:
-			lines = file.readlines()
-		
-		search_substring
-		for line in lines:
-			if search_substring[0:40].lower() in line.lower():
+		if not replace_substring_with_line.lines:
+			with open(path, 'r') as file:
+				replace_substring_with_line.lines = file.readlines()
+
+		for line in replace_substring_with_line.lines:
+			if search_substring[0:46].lower() in line.lower():
 				modified_line = line.strip()
 				# Replace the substring with the whole line
 				## line = line.lower().replace(search_substring.lower(), modified_line)
@@ -127,6 +132,153 @@ def replace_substring_with_line(path, search_substring, verbose=0):
 	
 	return search_substring

+replace_substring_with_line.lines = None
+
+def gender_substring(path, search_substring, verbose=0):
+	try:
+		if not gender_substring.lines:
+			with open(path, 'r') as file:
+				gender_substring.lines = file.readlines()
+
+		for line in gender_substring.lines:
+			if search_substring[2:].lower() in line.lower():
+				return line[0]
+	except FileNotFoundError:
+		print(f'The file {path} was not found.')
+	except Exception as e:
+		print(f'An error occurred: {e}')
+
+	return 'x' # not found, unknown gender
+
+gender_substring.lines = None
+
+def get_gender(firstnames, surname, call, verbose=0):
+
+	# load the .gender file:
+	genderfile = '.gender'
+	gender = 'x'
+	gpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), genderfile)
+	if os.path.exists(gpath):
+		firstname = firstnames.split(' ', 1)[0]
+		gender = gender_substring(gpath, firstname, verbose=0) # only check 1st/firstname of name, important when there are more than 1 firstnames 
+		if gender == 'x':
+			if verbose > 0:
+				get_gender.cnt += 1
+				print(f'({get_gender.cnt}){call} "{firstname}" [{firstnames} {surname}] not found in file {genderfile} - gender "x" is set.')
+	return gender
+
+get_gender.cnt = 0
+
+def call_split_name(fullname, call, verbose):
+
+	assert(len(fullname) > 1)
+
+	name = fullname.split(' ', 1)
+	surname = name[0]
+	# several special cases like surname "de Lijezer", "van Dijk", "el Shamaa", etc.
+	match surname.lower():
+		case 'de' | 'el':
+			name = fullname[3:].split(' ',1)
+			surname = surname.lower() + ' ' + name[0]
+			if verbose > 0:
+				print(f'## {fullname} --> {surname} ##')
+
+		case 'van' | 'von' :
+			name = fullname[4:].split(' ',1)
+			surname = surname.lower() + ' ' + name[0]
+			if surname.lower() in ['van der', 'von der', 'van den']:	# e.g. "van der Meulen", "Walther von der Vogelweide", "Annie van den Berg"
+				name = fullname[8:].split(' ',1)
+				surname = surname.lower() + ' ' + name[0]
+			if verbose > 0:
+				print(f'## {fullname} --> {surname} ##')
+		case 'della' : # Ancient Italian noble family "della Rowere"
+			name = fullname[6:].split(' ',1)
+			surname = surname.lower() + ' ' + name[0]
+			if verbose > 0:
+				print(f'## {fullname} --> {surname} ##')
+
+	if len(name) > 1:
+		firstname = name[1]
+	else:
+		firstname = '<unknown>'
+
+	# In Austria the call suffix starting with Y is an YL (young lady)
+#	if call[3].upper() == 'Y':
+	if False:
+		gender = 'f'
+	else:
+		gender = get_gender(firstname, surname, call, verbose)
+
+	return firstname, surname, gender
+
+def fix_typo(call, fullname, verbose=1):
+	fixtypofile = '.typo_callbook'
+	path = os.path.join(os.path.dirname(os.path.abspath(__file__)), fixtypofile)
+	if os.path.exists(path):
+		try:
+			if not fix_typo.lines:
+				with open(path, 'r') as file:
+					fix_typo.lines = file.readlines()
+
+			for line in fix_typo.lines[4:]: # starting with line 4
+				if len(fix_typo.spaces) == 0: # not initialized
+					if line[0] == '*':
+						words = line.split()
+						assert len(words) == 4 # i.e. '*, call, nachname, vorname'
+						fix_typo.spaces = [line.index(words[1]), line.index(words[2]), line.index(words[3])]
+				else:
+					if call in line[2:8]:
+						print(f'Call: {call} found')
+						match line[0]:
+							case '#':
+								if verbose > 1:
+									print(line.rstrip())
+							case 'F':
+								if verbose > 0:
+									print(fullname)
+									print(line.rstrip())
+								firstname1, surname1, gender1 = call_split_name(fullname, call, 0)
+								fullname2 = line[fix_typo.spaces[1]:fix_typo.spaces[2]-1].rstrip() + ' ' + line[fix_typo.spaces[2]:-1]
+								if verbose > 0:
+									print(fullname2)
+								firstname2, surname2, gender2 = call_split_name(fullname2, call, 0)
+								# Hardening: at a minimum, either the firstnames or the surenames must fit
+								fix_cnt = 0
+								if (firstname1 != firstname2):
+									fix_cnt += 1
+								if (surname1 != surname2):
+									fix_cnt += 1
+								if fix_cnt == 0:
+									print(f'It is fixed! You can remove the line with the item {call} from the file {fixtypofile}!')
+								elif fix_cnt > 1:
+									print(f'Something went wrong, there are several bugs. Check line with call {call} in file {fixtypofile}!')
+								else:
+									fullname = fullname2
+							case 'X': # exchange the surname with firstname
+								if verbose > 0:
+									print(fullname)
+									print(line.rstrip())
+								firstname1, surname1, gender1 = call_split_name(fullname, call, 0)
+								fullname2 = line[fix_typo.spaces[1]:fix_typo.spaces[2]-1].rstrip() + ' ' + line[fix_typo.spaces[2]:-1]
+								if verbose > 0:
+									print(fullname2)
+								firstname2, surname2, gender2 = call_split_name(fullname2, call, 0)
+								fix_cnt = 0
+								if (firstname1 ==  firstname2) and (surname1 == surname2):
+									print(f'It is fixed! You can remove the line with the item {call} from the file {fixtypofile}!')
+								elif (firstname1 !=  surname2) or (surname1 != firstname2):
+									print(f'Something went wrong, there are several bugs. Check line with call {call} in file {fixtypofile}!')
+								else:
+									fullname = fullname2
+
+		except FileNotFoundError:
+			print(f'The file {path} was not found.')
+		except Exception as e:
+			print(f'An error occurred: {e}')
+
+	return fullname
+fix_typo.lines = None
+fix_typo.spaces = []

 def call_data_record(line, mod_date, verbose):

@@ -153,31 +305,33 @@ def call_data_record(line, mod_date, verbose):
 	match = re.search(r'^(OE[0-9][A-Z]{1,3})', call)
 	assert(match.string == call)
 	fullname = records[1]
+	location = records[2]
+	address  = records[3]
+	permit_class = records[4]
+	fullname = fix_typo(call, fullname, verbose)
 	# If there is a clubstation
 	if is_clubstation(call):
 		# Name starting with only one quotation marks e.g. " -- remove that one:
 		fullname = remove_first_quote_if_odd(fullname, verbose) # only found @ clubstations
 		clubstationfile = '.callbook_club'
-		if verbose > 0:
+		if verbose > 1:
 			print(f'Call: {call}, Name: {fullname}')
 		path = os.path.join(os.path.dirname(os.path.abspath(__file__)), clubstationfile)
 		if os.path.exists(path):
 			fullname = replace_substring_with_line(path, fullname, verbose)
-		if verbose > 0:
-			print(f'Call: {call}, Name: {fullname}')
-			
+		gender = '*'
+	elif fullname[0] == '*':
+		gender = '*'
+	else: # Try to split the YL or OMs Name, guess the gender
+		firstname, surname, gender = call_split_name(fullname, call, verbose)
+	if verbose > 1:
+		if gender == '*':
+			print(f'Call: {call}, Name: {fullname}, Gender: {gender}')
+		else:
+			print(f'Call: {call}, First Name: {firstname}, Surname: {surname}, Gender: {gender}')

+		print(f'Location: {location}, Address: {address}, Permit: {permit_class}')

-#	if not record:
-#		return
-#	if verbose == 1:
-#		print(record.group(1))
-#	if verbose >= 3:
-#		print(f'Call: {record.group(1)}')
-#		print(f'Name: {record.group(2)}')
-#		#print(f'Location: {record[3]}')
-#		#print(f'Address: {record[4]}')
-#		#print(f'Permit Class: {record[5]}')

 def call_analyse_pdf(file, verbose):

@@ -196,13 +350,13 @@ def call_analyse_pdf(file, verbose):
 		print(f'   Title: {meta.title}')
 		print(f' Created: {meta.creation_date}')
 		print(f'Modified: {meta.modification_date}')
-		
+
 	for page in reader.pages:
 		page_text = page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False)
 		lines = page_text.strip().splitlines()
 		for line in lines[3:-2]:
 			line = line.strip()
-			# calls = re.findall(r' +(OE[0-9][A-Z]{1,3}).*$', page_text)
+
 			if verbose >= 2:
 				print(line)
 			call_data_record(line, meta.modification_date,verbose)
@@ -210,11 +364,11 @@ def call_analyse_pdf(file, verbose):
 if __name__ == '__main__':
 	# call_description()
 	args = call_parser()
-	# filename = 'Rufzeichenliste_AT_Stand_010624.pdf'
+
 	try:
 		filename = call_website(**vars(args))
-
-		print(f'Filename: {filename}')
+		if args.verbose > 1:
+			print(f'Filename: {filename}')
 		call_analyse_pdf(filename,args.verbose)
 		sys.exit(0)
 	except Exception as e: