scripts/afu/callbook.py

#!/usr/bin/env python3

import argparse
import os
import sys
import time
import pypdf
#from PyPDF2 import PdfReader
from pypdf import PdfReader
import re # regular expression
import pandas as pd

__version__ = '1.0.0'
__website__ = 'https://www.fb.gv.at/Funk/amateurfunkdienst.html'

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromiumService

def call_description():
	print(f'Download and Parse the Austrian Callbook Version {__version__}')

def call_parser():
	parser = argparse.ArgumentParser(
		description='Download and Parse the Austrian Callbook',
		epilog=f'''
		Written by Thomas Kuschel,
		Version {__version__}
		'''
	)
	parser.add_argument('-i', '--interactive', action='store_true', default=False)
	# parser.add_argument('-s', '--server' default=__website__, required=False)
	parser.add_argument('-V', '--version', action='version', version='{} {}'.format(os.path.split(__file__)[1],__version__))
	parser.add_argument('-v', '--verbose', action='append_const', const = 1)
	parser.add_argument('-p', '--path', default='Rufzeichenliste_AT_Stand_010624.pdf', help= 'skip the download if the specified path to a PDF file exists')
	# parser.add_argument('-t', '--type', default='' , help='specify the output, supported types are [ CSV | JSON ]') # not implemented yet
	parser.add_argument('-o', '--output', default='', help='specify the file where the data are written to, default stdout')
	parser.add_argument('-m', '--mariadb', help='SQL interface to MariaDB (MySql) format "<IP-Address>:<Port> <User> <Passwd>" or defined in .config')
	parser.add_argument('url', metavar='URL', nargs='?', default=__website__)

	opt = parser.parse_args()
	opt.verbose = 0 if opt.verbose is None else sum(opt.verbose)
	return opt

def call_website(url,verbose,path='',interactive=False,output='',mariadb=''):

	if path:
		if os.path.exists(path):
			return path
		else:
			print(f'The given path "{path}" does not exist.')
			sys.exit(3)

	if(interactive):
		print('Interactive')
		driver=webdriver.Chrome()
	else:
		print('Headless Script')
		options = webdriver.ChromeOptions()
		options.add_argument('--headless')
		options.add_argument('--no-sandbox')
		options.add_argument('--disable-dev-shm-usage')
		driver = webdriver.Chrome(options=options)

	driver.get(url)
	print(driver.title)
	# elements = driver.find_elements(By.XPATH,'//a[contains(@href,"Rufzeichen")]')
	elements = driver.find_elements(By.PARTIAL_LINK_TEXT,"Rufzeichen")

	if elements:
		element = elements[0]
		href = element.get_attribute('href')
		filename = element.click() # take the first one
	else:
		print('Sorry, no Link containing "Rufzeichen" found.')
		driver.close()
		sys.exit(2)

	print(element.text)
	# print(href)
	if(interactive):
		time.sleep(300)
	else:
		time.sleep(4)

	driver.close()
	return os.path.basename(href)

def remove_first_quote_if_odd(text, verbose = 0):
	double_quote_cnt = text.count('"')
	# single_quote_cnt = text.count("'")

	if (double_quote_cnt % 2 != 0): # or (single_quote_cnt % 2 != 0):
		# Find and remove the first quote
		for i, char in enumerate(text):
				if char in ['"']: # ['"', "'"]:
					if (verbose > 1):
						print(text)
					text = text[:i] + text[i+1:]
					if (verbose > 1):
						print(text)
					break
	return text

def is_clubstation(call):
	assert(len(call) > 3)
	if call[3].upper() == 'X' or call.upper() == 'OE5SIX': # special case with OE5SIX (Clubstation)
		return True
	
	return False


def replace_substring_with_line(path, search_substring, verbose=0):

	try:
		if not replace_substring_with_line.lines:
			with open(path, 'r') as file:
				replace_substring_with_line.lines = file.readlines()

		for line in replace_substring_with_line.lines:
			if search_substring[0:46].lower() in line.lower():
				return line.strip()

	except FileNotFoundError:
		print(f'The file {path} was not found.')
	except Exception as e:
		print(f'An error occurred: {e}')
	
	return search_substring

replace_substring_with_line.lines = None

def gender_substring(path, search_substring, verbose=0):
	try:
		if not gender_substring.lines:
			with open(path, 'r') as file:
				gender_substring.lines = file.readlines()

		for line in gender_substring.lines:
			if search_substring[2:].lower() in line.lower():
				return line[0]
	except FileNotFoundError:
		print(f'The file {path} was not found.')
	except Exception as e:
		print(f'An error occurred: {e}')

	return 'x' # not found, unknown gender

gender_substring.lines = None

def get_gender(firstnames, surname, call, verbose=0):

	# load the .gender file:
	genderfile = '.gender'
	gender = 'x'
	gpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), genderfile)
	if os.path.exists(gpath):
		firstname = firstnames.split(' ', 1)[0]
		gender = gender_substring(gpath, firstname, verbose=0) # only check 1st/firstname of name, important when there are more than 1 firstnames 
		if gender == 'x':
			if verbose > 0:
				get_gender.cnt += 1
				print(f'({get_gender.cnt}){call} "{firstname}" [{firstnames} {surname}] not found in file {genderfile} - gender "x" is set.')
	return gender

get_gender.cnt = 0

def call_split_name(fullname, call, verbose):

	assert(len(fullname) > 1)

	name = fullname.split(' ', 1)
	surname = name[0]
	# several special cases like surname "de Lijezer", "van Dijk", "el Shamaa", etc.
	match surname.lower():
		case 'de' | 'el':
			name = fullname[3:].split(' ',1)
			surname = surname.lower() + ' ' + name[0]
			if verbose > 0:
				print(f'## {fullname} --> {surname} ##')

		case 'van' | 'von' :
			name = fullname[4:].split(' ',1)
			surname = surname.lower() + ' ' + name[0]
			if surname.lower() in ['van der', 'von der', 'van den']:	# e.g. "van der Meulen", "Walther von der Vogelweide", "Annie van den Berg"
				name = fullname[8:].split(' ',1)
				surname = surname.lower() + ' ' + name[0]
			if verbose > 0:
				print(f'## {fullname} --> {surname} ##')
		case 'della' : # Ancient Italian noble family "della Rowere"
			name = fullname[6:].split(' ',1)
			surname = surname.lower() + ' ' + name[0]
			if verbose > 0:
				print(f'## {fullname} --> {surname} ##')

	if len(name) > 1:
		firstname = name[1]
	else:
		firstname = '<unknown>'

	# In Austria the call suffix starting with Y is an YL (young lady)
#	if call[3].upper() == 'Y':
	if False:
		gender = 'f'
	else:
		gender = get_gender(firstname, surname, call, verbose)

	return firstname, surname, gender

def fix_typo(call, fullname, verbose=1):
	fixtypofile = '.typo_callbook'
	path = os.path.join(os.path.dirname(os.path.abspath(__file__)), fixtypofile)
	if os.path.exists(path):
		try:
			if not fix_typo.lines:
				with open(path, 'r') as file:
					fix_typo.lines = file.readlines()
					if verbose > 0:
						print(f'File "{fixtypofile}":')
						for line in fix_typo.lines:
							print(f'>> {line.rstrip()}')
						print('>> ** EOF **')

			for line in fix_typo.lines[4:]: # starting with line 4
				if len(fix_typo.spaces) == 0: # not initialized
					if line[0] == '*':
						words = line.split()
						assert len(words) == 4 # i.e. '*, call, nachname, vorname'
						fix_typo.spaces = [line.index(words[1]), line.index(words[2]), line.index(words[3])]
				else:
					if call in line[2:8]:
						if verbose > 1:
							print(f'Call: {call} found')
						match line[0]:
							case '#':
								if verbose > 1:
									print(line.rstrip())
							case 'F':
								if verbose > 0:
									print(line.rstrip())
									print(fullname)
								firstname1, surname1, gender1 = call_split_name(fullname, call, 0)
								fullname2 = line[fix_typo.spaces[1]:fix_typo.spaces[2]-1].rstrip() + ' ' + line[fix_typo.spaces[2]:-1]
								if verbose > 0:
									print(fullname2)
								firstname2, surname2, gender2 = call_split_name(fullname2, call, 0)
								# Hardening: at a minimum, either the firstnames or the surenames must fit
								fix_cnt = 0
								if (firstname1 != firstname2):
									fix_cnt += 1
								if (surname1 != surname2):
									fix_cnt += 1
								if fix_cnt == 0:
									print(f'It is fixed! You can remove the line with the item {call} from the file {fixtypofile}!')
								elif fix_cnt > 1:
									print(f'Something went wrong, there are several bugs. Check line with call {call} in file {fixtypofile}!')
								else:
									fullname = fullname2
							case 'X': # exchange the surname with firstname
								if verbose > 0:
									print(line.rstrip())
									print(fullname)
								firstname1, surname1, gender1 = call_split_name(fullname, call, 0)
								fullname2 = line[fix_typo.spaces[1]:fix_typo.spaces[2]-1].rstrip() + ' ' + line[fix_typo.spaces[2]:-1]
								if verbose > 0:
									print(fullname2)
								firstname2, surname2, gender2 = call_split_name(fullname2, call, 0)
								fix_cnt = 0
								if (firstname1 ==  firstname2) and (surname1 == surname2):
									print(f'It is fixed! You can remove the line with the item {call} from the file {fixtypofile}!')
								elif (firstname1 !=  surname2) or (surname1 != firstname2):
									print(f'Something went wrong, there are several bugs. Check line with call {call} in file {fixtypofile}!')
								else:
									fullname = fullname2

		except FileNotFoundError:
			print(f'The file {path} was not found.')
		except Exception as e:
			print(f'An error occurred: {e}')

	return fullname
fix_typo.lines = None
fix_typo.spaces = []

def call_data_record(line, mod_date, verbose):

	# we have to split the record with a cost-intensive regular expression
	# record = re.split('OE[0-9][A-Z]{1,3}[ \t]{3,20}',line) # this does not work 100%
	# record = re.findall(r'(OE[0-9][A-Z]{1,3})[ \t]{2,12}([A-ZÄÖÜ].+[ ]?.*[ ]?.*[ ]?.*)[ \t]{3,30}(.{3,30})[ \t]{3,30}([1,3,4])', line)
	# record = re.search(r'(OE[0-9][A-Z]{1,3})[ ]{2,12}([. ]+)[ ]{3,50}([. ]+)[ ]{3,50}([1-4])', line)
	# record = re.search(r'^(OE[0-9][A-Z]{1,3})[ \t]{2,20}([\w ]{1,12})[ ]{3,50}(.*)([1-4]{1})$', line)

	# Never split Addresses containing 2 or 3 spaces, also several records contain no address or no location
	records = re.split(r'[ ]{4,65}', line)
	# [records for record in records]

	if verbose > 2 :
		print(f'Record length: {len(records)}')

		for m in records:
			print(m)

	# HARDENING:
	assert(len(records) == 5)
	# OE Call:
	call = records[0]
	match = re.search(r'^(OE[0-9][A-Z]{1,3})', call)
	assert(match.string == call)
	fullname = records[1]
	location = records[2]
	address  = records[3]
	permit_class = records[4]
	fullname = fix_typo(call, fullname, verbose)
	# If there is a clubstation
	if is_clubstation(call):
		# Name starting with only one quotation marks e.g. " -- remove that one:
		fullname = remove_first_quote_if_odd(fullname, verbose) # only found @ clubstations
		clubstationfile = '.callbook_club'
		if verbose > 1:
			print(f'Call: {call}, Name: {fullname}')
		path = os.path.join(os.path.dirname(os.path.abspath(__file__)), clubstationfile)
		if os.path.exists(path):
			fullname = replace_substring_with_line(path, fullname, verbose)
		gender = '*'
	elif fullname[0] == '*':
		gender = '*'
	else: # Try to split the YL or OMs Name, guess the gender
		firstname, surname, gender = call_split_name(fullname, call, verbose)
	if verbose > 1:
		if gender == '*':
			print(f'Call: {call}, Name: {fullname}, Gender: {gender}')
		else:
			print(f'Call: {call}, First Name: {firstname}, Surname: {surname}, Gender: {gender}')

		print(f'Location: {location}, Address: {address}, Permit: {permit_class}')


def call_analyse_pdf(file, verbose):

	# Define a regular expression to match tables

	reader = PdfReader(file)
	meta = reader.metadata
	if verbose:
		print(verbose)
		print('   Pages:', len(reader.pages))	
		# All of the following could be None!
		print(f'  Author: {meta.author}')
		print(f' Creator: {meta.creator}')
		print(f'Producer: {meta.producer}')
		print(f' Subject: {meta.subject}')
		print(f'   Title: {meta.title}')
		print(f' Created: {meta.creation_date}')
		print(f'Modified: {meta.modification_date}')

	for page in reader.pages:
		page_text = page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False)
		lines = page_text.strip().splitlines()
		for line in lines[3:-2]:
			line = line.strip()

			if verbose >= 2:
				print(line)
			call_data_record(line, meta.modification_date,verbose)

if __name__ == '__main__':
	# call_description()
	args = call_parser()

	try:
		filename = call_website(**vars(args))
		if args.verbose > 1:
			print(f'Filename: {filename}')
		call_analyse_pdf(filename,args.verbose)
		sys.exit(0)
	except Exception as e:
		print('Error: {}'.format(e), file=sys.stderr)
		sys.exit(1)
UPD README.md and README.ARCH.md 2024-06-13 03:03:39 +02:00			`#!/usr/bin/env python3`

			`import argparse`
			`import os`
			`import sys`
ADD callbook.py init release 1.0.0 2024-06-14 00:20:26 +02:00			`import time`
Add parser, still work in progress 2024-06-16 07:38:30 +02:00			`import pypdf`
			`#from PyPDF2 import PdfReader`
			`from pypdf import PdfReader`
			`import re # regular expression`
			`import pandas as pd`
UPD README.md and README.ARCH.md 2024-06-13 03:03:39 +02:00
ADD callbook.py init release 1.0.0 2024-06-14 00:20:26 +02:00			`__version__ = '1.0.0'`
			`__website__ = 'https://www.fb.gv.at/Funk/amateurfunkdienst.html'`
UPD README.md and README.ARCH.md 2024-06-13 03:03:39 +02:00
			`from selenium import webdriver`
			`from selenium.webdriver.common.by import By`
ADD callbook.py init release 1.0.0 2024-06-14 00:20:26 +02:00			`from selenium.webdriver.chrome.service import Service as ChromiumService`
UPD README.md and README.ARCH.md 2024-06-13 03:03:39 +02:00
ADD callbook.py init release 1.0.0 2024-06-14 00:20:26 +02:00			`def call_description():`
			`print(f'Download and Parse the Austrian Callbook Version {__version__}')`
UPD README.md and README.ARCH.md 2024-06-13 03:03:39 +02:00
ADD callbook.py init release 1.0.0 2024-06-14 00:20:26 +02:00			`def call_parser():`
			`parser = argparse.ArgumentParser(`
			`description='Download and Parse the Austrian Callbook',`
			`epilog=f'''`
			`Written by Thomas Kuschel,`
			`Version {__version__}`
			`'''`
			`)`
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00			`parser.add_argument('-i', '--interactive', action='store_true', default=False)`
			`# parser.add_argument('-s', '--server' default=__website__, required=False)`
			`parser.add_argument('-V', '--version', action='version', version='{} {}'.format(os.path.split(__file__)[1],__version__))`
			`parser.add_argument('-v', '--verbose', action='append_const', const = 1)`
			`parser.add_argument('-p', '--path', default='Rufzeichenliste_AT_Stand_010624.pdf', help= 'skip the download if the specified path to a PDF file exists')`
ADD typo fixing file 2024-06-17 19:16:28 +02:00			`# parser.add_argument('-t', '--type', default='' , help='specify the output, supported types are [ CSV \| JSON ]') # not implemented yet`
			`parser.add_argument('-o', '--output', default='', help='specify the file where the data are written to, default stdout')`
			`parser.add_argument('-m', '--mariadb', help='SQL interface to MariaDB (MySql) format "<IP-Address>:<Port> <User> <Passwd>" or defined in .config')`
ADD callbook.py init release 1.0.0 2024-06-14 00:20:26 +02:00			`parser.add_argument('url', metavar='URL', nargs='?', default=__website__)`

ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00			`opt = parser.parse_args()`
			`opt.verbose = 0 if opt.verbose is None else sum(opt.verbose)`
			`return opt`

ADD typo fixing file 2024-06-17 19:16:28 +02:00			`def call_website(url,verbose,path='',interactive=False,output='',mariadb=''):`
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00
			`if path:`
			`if os.path.exists(path):`
			`return path`
			`else:`
			`print(f'The given path "{path}" does not exist.')`
			`sys.exit(3)`
UPD README.md and README.ARCH.md 2024-06-13 03:03:39 +02:00
			`if(interactive):`
			`print('Interactive')`
ADD callbook.py init release 1.0.0 2024-06-14 00:20:26 +02:00			`driver=webdriver.Chrome()`
			`else:`
UPD README.md and README.ARCH.md 2024-06-13 03:03:39 +02:00			`print('Headless Script')`
ADD callbook.py init release 1.0.0 2024-06-14 00:20:26 +02:00			`options = webdriver.ChromeOptions()`
			`options.add_argument('--headless')`
			`options.add_argument('--no-sandbox')`
			`options.add_argument('--disable-dev-shm-usage')`
			`driver = webdriver.Chrome(options=options)`

UPD README.md and README.ARCH.md 2024-06-13 03:03:39 +02:00			`driver.get(url)`
ADD callbook.py init release 1.0.0 2024-06-14 00:20:26 +02:00			`print(driver.title)`
ADD get fileattribute, file basename 2024-06-15 23:40:34 +02:00			`# elements = driver.find_elements(By.XPATH,'//a[contains(@href,"Rufzeichen")]')`
			`elements = driver.find_elements(By.PARTIAL_LINK_TEXT,"Rufzeichen")`
ADD callbook.py init release 1.0.0 2024-06-14 00:20:26 +02:00
			`if elements:`
			`element = elements[0]`
ADD get fileattribute, file basename 2024-06-15 23:40:34 +02:00			`href = element.get_attribute('href')`
			`filename = element.click() # take the first one`
ADD callbook.py init release 1.0.0 2024-06-14 00:20:26 +02:00			`else:`
			`print('Sorry, no Link containing "Rufzeichen" found.')`
			`driver.close()`
			`sys.exit(2)`
UPD README.md and README.ARCH.md 2024-06-13 03:03:39 +02:00
ADD callbook.py init release 1.0.0 2024-06-14 00:20:26 +02:00			`print(element.text)`
ADD get fileattribute, file basename 2024-06-15 23:40:34 +02:00			`# print(href)`
Add more time when interactive 2024-06-15 23:44:41 +02:00			`if(interactive):`
			`time.sleep(300)`
Add parser, still work in progress 2024-06-16 07:38:30 +02:00			`else:`
ADD typo fixing file 2024-06-17 19:16:28 +02:00			`time.sleep(4)`
UPD README.md and README.ARCH.md 2024-06-13 03:03:39 +02:00
			`driver.close()`
ADD get fileattribute, file basename 2024-06-15 23:40:34 +02:00			`return os.path.basename(href)`
UPD README.md and README.ARCH.md 2024-06-13 03:03:39 +02:00
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00			`def remove_first_quote_if_odd(text, verbose = 0):`
			`double_quote_cnt = text.count('"')`
			`# single_quote_cnt = text.count("'")`

			`if (double_quote_cnt % 2 != 0): # or (single_quote_cnt % 2 != 0):`
			`# Find and remove the first quote`
			`for i, char in enumerate(text):`
			`if char in ['"']: # ['"', "'"]:`
ADD typo fixing file 2024-06-17 19:16:28 +02:00			`if (verbose > 1):`
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00			`print(text)`
			`text = text[:i] + text[i+1:]`
ADD typo fixing file 2024-06-17 19:16:28 +02:00			`if (verbose > 1):`
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00			`print(text)`
			`break`
			`return text`

			`def is_clubstation(call):`
			`assert(len(call) > 3)`
ADD typo fixing file 2024-06-17 19:16:28 +02:00			`if call[3].upper() == 'X' or call.upper() == 'OE5SIX': # special case with OE5SIX (Clubstation)`
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00			`return True`
ADD typo fixing file 2024-06-17 19:16:28 +02:00
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00			`return False`

ADD typo fixing file 2024-06-17 19:16:28 +02:00
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00			`def replace_substring_with_line(path, search_substring, verbose=0):`
Improve language 2024-06-17 20:59:16 +02:00
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00			`try:`
ADD typo fixing file 2024-06-17 19:16:28 +02:00			`if not replace_substring_with_line.lines:`
			`with open(path, 'r') as file:`
			`replace_substring_with_line.lines = file.readlines()`

			`for line in replace_substring_with_line.lines:`
			`if search_substring[0:46].lower() in line.lower():`
Improve language 2024-06-17 20:59:16 +02:00			`return line.strip()`
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00
			`except FileNotFoundError:`
			`print(f'The file {path} was not found.')`
			`except Exception as e:`
			`print(f'An error occurred: {e}')`

			`return search_substring`

ADD typo fixing file 2024-06-17 19:16:28 +02:00			`replace_substring_with_line.lines = None`

			`def gender_substring(path, search_substring, verbose=0):`
			`try:`
			`if not gender_substring.lines:`
			`with open(path, 'r') as file:`
			`gender_substring.lines = file.readlines()`

			`for line in gender_substring.lines:`
			`if search_substring[2:].lower() in line.lower():`
			`return line[0]`
			`except FileNotFoundError:`
			`print(f'The file {path} was not found.')`
			`except Exception as e:`
			`print(f'An error occurred: {e}')`

			`return 'x' # not found, unknown gender`

			`gender_substring.lines = None`

			`def get_gender(firstnames, surname, call, verbose=0):`

			`# load the .gender file:`
			`genderfile = '.gender'`
			`gender = 'x'`
			`gpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), genderfile)`
			`if os.path.exists(gpath):`
			`firstname = firstnames.split(' ', 1)[0]`
			`gender = gender_substring(gpath, firstname, verbose=0) # only check 1st/firstname of name, important when there are more than 1 firstnames`
			`if gender == 'x':`
			`if verbose > 0:`
			`get_gender.cnt += 1`
			`print(f'({get_gender.cnt}){call} "{firstname}" [{firstnames} {surname}] not found in file {genderfile} - gender "x" is set.')`
			`return gender`

			`get_gender.cnt = 0`

			`def call_split_name(fullname, call, verbose):`

			`assert(len(fullname) > 1)`

			`name = fullname.split(' ', 1)`
			`surname = name[0]`
			`# several special cases like surname "de Lijezer", "van Dijk", "el Shamaa", etc.`
			`match surname.lower():`
			`case 'de' \| 'el':`
			`name = fullname[3:].split(' ',1)`
			`surname = surname.lower() + ' ' + name[0]`
			`if verbose > 0:`
			`print(f'## {fullname} --> {surname} ##')`

			`case 'van' \| 'von' :`
			`name = fullname[4:].split(' ',1)`
			`surname = surname.lower() + ' ' + name[0]`
			`if surname.lower() in ['van der', 'von der', 'van den']: # e.g. "van der Meulen", "Walther von der Vogelweide", "Annie van den Berg"`
			`name = fullname[8:].split(' ',1)`
			`surname = surname.lower() + ' ' + name[0]`
			`if verbose > 0:`
			`print(f'## {fullname} --> {surname} ##')`
			`case 'della' : # Ancient Italian noble family "della Rowere"`
			`name = fullname[6:].split(' ',1)`
			`surname = surname.lower() + ' ' + name[0]`
			`if verbose > 0:`
			`print(f'## {fullname} --> {surname} ##')`

			`if len(name) > 1:`
			`firstname = name[1]`
			`else:`
			`firstname = '<unknown>'`

			`# In Austria the call suffix starting with Y is an YL (young lady)`
			`# if call[3].upper() == 'Y':`
			`if False:`
			`gender = 'f'`
			`else:`
			`gender = get_gender(firstname, surname, call, verbose)`

			`return firstname, surname, gender`

			`def fix_typo(call, fullname, verbose=1):`
			`fixtypofile = '.typo_callbook'`
			`path = os.path.join(os.path.dirname(os.path.abspath(__file__)), fixtypofile)`
			`if os.path.exists(path):`
			`try:`
			`if not fix_typo.lines:`
			`with open(path, 'r') as file:`
			`fix_typo.lines = file.readlines()`
CHG verbose > 0 printing call found, using typo file, print typo file 2024-06-17 20:01:16 +02:00			`if verbose > 0:`
			`print(f'File "{fixtypofile}":')`
			`for line in fix_typo.lines:`
			`print(f'>> {line.rstrip()}')`
			`print('>> EOF ')`
ADD typo fixing file 2024-06-17 19:16:28 +02:00
			`for line in fix_typo.lines[4:]: # starting with line 4`
			`if len(fix_typo.spaces) == 0: # not initialized`
			`if line[0] == '*':`
			`words = line.split()`
			`assert len(words) == 4 # i.e. '*, call, nachname, vorname'`
			`fix_typo.spaces = [line.index(words[1]), line.index(words[2]), line.index(words[3])]`
			`else:`
			`if call in line[2:8]:`
Improve language 2024-06-17 20:59:16 +02:00			`if verbose > 1:`
CHG verbose > 0 printing call found, using typo file, print typo file 2024-06-17 20:01:16 +02:00			`print(f'Call: {call} found')`
ADD typo fixing file 2024-06-17 19:16:28 +02:00			`match line[0]:`
			`case '#':`
			`if verbose > 1:`
			`print(line.rstrip())`
			`case 'F':`
			`if verbose > 0:`
			`print(line.rstrip())`
Improve language 2024-06-17 20:59:16 +02:00			`print(fullname)`
ADD typo fixing file 2024-06-17 19:16:28 +02:00			`firstname1, surname1, gender1 = call_split_name(fullname, call, 0)`
			`fullname2 = line[fix_typo.spaces[1]:fix_typo.spaces[2]-1].rstrip() + ' ' + line[fix_typo.spaces[2]:-1]`
			`if verbose > 0:`
			`print(fullname2)`
			`firstname2, surname2, gender2 = call_split_name(fullname2, call, 0)`
			`# Hardening: at a minimum, either the firstnames or the surenames must fit`
			`fix_cnt = 0`
			`if (firstname1 != firstname2):`
			`fix_cnt += 1`
			`if (surname1 != surname2):`
			`fix_cnt += 1`
			`if fix_cnt == 0:`
			`print(f'It is fixed! You can remove the line with the item {call} from the file {fixtypofile}!')`
			`elif fix_cnt > 1:`
			`print(f'Something went wrong, there are several bugs. Check line with call {call} in file {fixtypofile}!')`
			`else:`
			`fullname = fullname2`
			`case 'X': # exchange the surname with firstname`
			`if verbose > 0:`
			`print(line.rstrip())`
Improve language 2024-06-17 20:59:16 +02:00			`print(fullname)`
ADD typo fixing file 2024-06-17 19:16:28 +02:00			`firstname1, surname1, gender1 = call_split_name(fullname, call, 0)`
			`fullname2 = line[fix_typo.spaces[1]:fix_typo.spaces[2]-1].rstrip() + ' ' + line[fix_typo.spaces[2]:-1]`
			`if verbose > 0:`
			`print(fullname2)`
			`firstname2, surname2, gender2 = call_split_name(fullname2, call, 0)`
			`fix_cnt = 0`
			`if (firstname1 == firstname2) and (surname1 == surname2):`
			`print(f'It is fixed! You can remove the line with the item {call} from the file {fixtypofile}!')`
			`elif (firstname1 != surname2) or (surname1 != firstname2):`
			`print(f'Something went wrong, there are several bugs. Check line with call {call} in file {fixtypofile}!')`
			`else:`
			`fullname = fullname2`

			`except FileNotFoundError:`
			`print(f'The file {path} was not found.')`
			`except Exception as e:`
			`print(f'An error occurred: {e}')`

			`return fullname`
			`fix_typo.lines = None`
			`fix_typo.spaces = []`
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00
			`def call_data_record(line, mod_date, verbose):`

			`# we have to split the record with a cost-intensive regular expression`
			`# record = re.split('OE[0-9][A-Z]{1,3}[ \t]{3,20}',line) # this does not work 100%`
			`# record = re.findall(r'(OE[0-9][A-Z]{1,3})[ \t]{2,12}([A-ZÄÖÜ].+[ ]?.[ ]?.[ ]?.*)[ \t]{3,30}(.{3,30})[ \t]{3,30}([1,3,4])', line)`
			`# record = re.search(r'(OE[0-9][A-Z]{1,3})[ ]{2,12}([. ]+)[ ]{3,50}([. ]+)[ ]{3,50}([1-4])', line)`
			`# record = re.search(r'^(OE[0-9][A-Z]{1,3})[ \t]{2,20}([\w ]{1,12})[ ]{3,50}(.*)([1-4]{1})$', line)`

			`# Never split Addresses containing 2 or 3 spaces, also several records contain no address or no location`
			`records = re.split(r'[ ]{4,65}', line)`
			`# [records for record in records]`

			`if verbose > 2 :`
			`print(f'Record length: {len(records)}')`

			`for m in records:`
			`print(m)`

			`# HARDENING:`
			`assert(len(records) == 5)`
			`# OE Call:`
			`call = records[0]`
			`match = re.search(r'^(OE[0-9][A-Z]{1,3})', call)`
			`assert(match.string == call)`
			`fullname = records[1]`
ADD typo fixing file 2024-06-17 19:16:28 +02:00			`location = records[2]`
			`address = records[3]`
			`permit_class = records[4]`
			`fullname = fix_typo(call, fullname, verbose)`
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00			`# If there is a clubstation`
			`if is_clubstation(call):`
			`# Name starting with only one quotation marks e.g. " -- remove that one:`
			`fullname = remove_first_quote_if_odd(fullname, verbose) # only found @ clubstations`
			`clubstationfile = '.callbook_club'`
ADD typo fixing file 2024-06-17 19:16:28 +02:00			`if verbose > 1:`
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00			`print(f'Call: {call}, Name: {fullname}')`
			`path = os.path.join(os.path.dirname(os.path.abspath(__file__)), clubstationfile)`
			`if os.path.exists(path):`
			`fullname = replace_substring_with_line(path, fullname, verbose)`
ADD typo fixing file 2024-06-17 19:16:28 +02:00			`gender = '*'`
			`elif fullname[0] == '*':`
			`gender = '*'`
			`else: # Try to split the YL or OMs Name, guess the gender`
			`firstname, surname, gender = call_split_name(fullname, call, verbose)`
			`if verbose > 1:`
			`if gender == '*':`
			`print(f'Call: {call}, Name: {fullname}, Gender: {gender}')`
			`else:`
			`print(f'Call: {call}, First Name: {firstname}, Surname: {surname}, Gender: {gender}')`
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00
ADD typo fixing file 2024-06-17 19:16:28 +02:00			`print(f'Location: {location}, Address: {address}, Permit: {permit_class}')`
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00

			`def call_analyse_pdf(file, verbose):`
Add parser, still work in progress 2024-06-16 07:38:30 +02:00
			`# Define a regular expression to match tables`

			`reader = PdfReader(file)`
			`meta = reader.metadata`
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00			`if verbose:`
			`print(verbose)`
			`print(' Pages:', len(reader.pages))`
			`# All of the following could be None!`
			`print(f' Author: {meta.author}')`
			`print(f' Creator: {meta.creator}')`
			`print(f'Producer: {meta.producer}')`
			`print(f' Subject: {meta.subject}')`
			`print(f' Title: {meta.title}')`
			`print(f' Created: {meta.creation_date}')`
			`print(f'Modified: {meta.modification_date}')`
ADD typo fixing file 2024-06-17 19:16:28 +02:00
Add parser, still work in progress 2024-06-16 07:38:30 +02:00			`for page in reader.pages:`
			`page_text = page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False)`
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00			`lines = page_text.strip().splitlines()`
			`for line in lines[3:-2]:`
			`line = line.strip()`
ADD typo fixing file 2024-06-17 19:16:28 +02:00
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00			`if verbose >= 2:`
			`print(line)`
			`call_data_record(line, meta.modification_date,verbose)`
Add parser, still work in progress 2024-06-16 07:38:30 +02:00
UPD README.md and README.ARCH.md 2024-06-13 03:03:39 +02:00			`if __name__ == '__main__':`
ADD callbook.py init release 1.0.0 2024-06-14 00:20:26 +02:00			`# call_description()`
			`args = call_parser()`
ADD typo fixing file 2024-06-17 19:16:28 +02:00
UPD README.md and README.ARCH.md 2024-06-13 03:03:39 +02:00			`try:`
			`filename = call_website(**vars(args))`
ADD typo fixing file 2024-06-17 19:16:28 +02:00			`if args.verbose > 1:`
			`print(f'Filename: {filename}')`
ADD .callbook_club file to expand clubstation names 2024-06-16 17:00:46 +02:00			`call_analyse_pdf(filename,args.verbose)`
UPD README.md and README.ARCH.md 2024-06-13 03:03:39 +02:00			`sys.exit(0)`
			`except Exception as e:`
			`print('Error: {}'.format(e), file=sys.stderr)`
			`sys.exit(1)`