2024-06-13 03:03:39 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
2024-06-23 15:21:15 +02:00
|
|
|
__version__ = '1.0.0'
|
|
|
|
__website__ = 'https://www.fb.gv.at/Funk/amateurfunkdienst.html'
|
|
|
|
|
2024-06-13 03:03:39 +02:00
|
|
|
import argparse
|
|
|
|
import os
|
2024-06-19 19:51:08 +02:00
|
|
|
import mariadb
|
2024-06-13 03:03:39 +02:00
|
|
|
import sys
|
2024-06-14 00:20:26 +02:00
|
|
|
import time
|
2024-06-16 07:38:30 +02:00
|
|
|
import pypdf
|
|
|
|
from pypdf import PdfReader
|
|
|
|
import re # regular expression
|
2024-06-19 19:51:08 +02:00
|
|
|
import datetime
|
2024-06-23 15:21:15 +02:00
|
|
|
import configparser
|
2024-06-13 03:03:39 +02:00
|
|
|
|
|
|
|
from selenium import webdriver
|
|
|
|
from selenium.webdriver.common.by import By
|
2024-06-14 00:20:26 +02:00
|
|
|
from selenium.webdriver.chrome.service import Service as ChromiumService
|
2024-06-13 03:03:39 +02:00
|
|
|
|
2024-06-14 00:20:26 +02:00
|
|
|
def call_description():
|
|
|
|
print(f'Download and Parse the Austrian Callbook Version {__version__}')
|
2024-06-13 03:03:39 +02:00
|
|
|
|
2024-06-23 15:21:15 +02:00
|
|
|
def call_configparser():
|
|
|
|
config = configparser.ConfigParser()
|
2024-06-23 19:49:35 +02:00
|
|
|
try:
|
|
|
|
configfile = get_active_path('config.ini')
|
|
|
|
ret = config.read(configfile)
|
|
|
|
if not ret:
|
|
|
|
print(f'The file {configfile} was not found. We create a new config file.')
|
|
|
|
config['Common']={
|
|
|
|
'description': 'Download and Parse the Austrian Callbook',
|
|
|
|
'author': 'Thomas Kuschel, OE3TKT (OExTKT)',
|
|
|
|
'version': __version__,
|
|
|
|
'created': datetime.datetime.now()
|
|
|
|
}
|
|
|
|
config['MariaDB'] = {
|
|
|
|
'user': 'om',
|
|
|
|
'password': 'oe3tkt',
|
|
|
|
'host': '127.0.0.1',
|
|
|
|
'port': 3306,
|
|
|
|
'database': 'callbook'}
|
|
|
|
with open(get_active_path('config.ini'), 'w') as configfile:
|
|
|
|
config.write(configfile, False)
|
|
|
|
print(f'Configuration file "config.ini" created.')
|
|
|
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
sys.exit(8)
|
|
|
|
except Exception as e:
|
|
|
|
print('Error: {}'.format(e), file=sys.stderr)
|
|
|
|
sys.exit(9)
|
|
|
|
# print(config.sections()) ## ['Common', 'MariaDB']
|
|
|
|
return config
|
2024-06-23 15:21:15 +02:00
|
|
|
|
2024-06-14 00:20:26 +02:00
|
|
|
def call_parser():
|
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
description='Download and Parse the Austrian Callbook',
|
|
|
|
epilog=f'''
|
|
|
|
Written by Thomas Kuschel,
|
|
|
|
Version {__version__}
|
|
|
|
'''
|
|
|
|
)
|
2024-06-16 17:00:46 +02:00
|
|
|
parser.add_argument('-i', '--interactive', action='store_true', default=False)
|
|
|
|
# parser.add_argument('-s', '--server' default=__website__, required=False)
|
|
|
|
parser.add_argument('-V', '--version', action='version', version='{} {}'.format(os.path.split(__file__)[1],__version__))
|
|
|
|
parser.add_argument('-v', '--verbose', action='append_const', const = 1)
|
2024-06-19 19:51:08 +02:00
|
|
|
# Rufzeichenliste_AT_Stand_010624.pdf
|
2024-06-23 15:23:57 +02:00
|
|
|
parser.add_argument('-p', '--path', default='Rufzeichenliste_AT_Stand_010624.pdf', help= 'skip the download if the specified path to a PDF file exists')
|
2024-06-17 19:16:28 +02:00
|
|
|
# parser.add_argument('-t', '--type', default='' , help='specify the output, supported types are [ CSV | JSON ]') # not implemented yet
|
|
|
|
parser.add_argument('-o', '--output', default='', help='specify the file where the data are written to, default stdout')
|
|
|
|
parser.add_argument('-m', '--mariadb', help='SQL interface to MariaDB (MySql) format "<IP-Address>:<Port> <User> <Passwd>" or defined in .config')
|
2024-06-14 00:20:26 +02:00
|
|
|
parser.add_argument('url', metavar='URL', nargs='?', default=__website__)
|
|
|
|
|
2024-06-16 17:00:46 +02:00
|
|
|
opt = parser.parse_args()
|
|
|
|
opt.verbose = 0 if opt.verbose is None else sum(opt.verbose)
|
2024-06-23 15:23:57 +02:00
|
|
|
if opt.path != '' and os.path.dirname(opt.path) == '':
|
2024-06-23 19:49:35 +02:00
|
|
|
opt.path = get_active_path(opt.path)
|
2024-06-16 17:00:46 +02:00
|
|
|
return opt
|
|
|
|
|
2024-06-17 19:16:28 +02:00
|
|
|
def call_website(url,verbose,path='',interactive=False,output='',mariadb=''):
|
2024-06-16 17:00:46 +02:00
|
|
|
|
|
|
|
if path:
|
|
|
|
if os.path.exists(path):
|
|
|
|
return path
|
|
|
|
else:
|
|
|
|
print(f'The given path "{path}" does not exist.')
|
|
|
|
sys.exit(3)
|
2024-06-13 03:03:39 +02:00
|
|
|
|
|
|
|
if(interactive):
|
|
|
|
print('Interactive')
|
2024-06-14 00:20:26 +02:00
|
|
|
driver=webdriver.Chrome()
|
|
|
|
else:
|
2024-06-13 03:03:39 +02:00
|
|
|
print('Headless Script')
|
2024-06-14 00:20:26 +02:00
|
|
|
options = webdriver.ChromeOptions()
|
|
|
|
options.add_argument('--headless')
|
|
|
|
options.add_argument('--no-sandbox')
|
|
|
|
options.add_argument('--disable-dev-shm-usage')
|
|
|
|
driver = webdriver.Chrome(options=options)
|
|
|
|
|
2024-06-13 03:03:39 +02:00
|
|
|
driver.get(url)
|
2024-06-14 00:20:26 +02:00
|
|
|
print(driver.title)
|
2024-06-15 23:40:34 +02:00
|
|
|
# elements = driver.find_elements(By.XPATH,'//a[contains(@href,"Rufzeichen")]')
|
|
|
|
elements = driver.find_elements(By.PARTIAL_LINK_TEXT,"Rufzeichen")
|
2024-06-14 00:20:26 +02:00
|
|
|
|
|
|
|
if elements:
|
|
|
|
element = elements[0]
|
2024-06-15 23:40:34 +02:00
|
|
|
href = element.get_attribute('href')
|
|
|
|
filename = element.click() # take the first one
|
2024-06-14 00:20:26 +02:00
|
|
|
else:
|
|
|
|
print('Sorry, no Link containing "Rufzeichen" found.')
|
|
|
|
driver.close()
|
|
|
|
sys.exit(2)
|
2024-06-13 03:03:39 +02:00
|
|
|
|
2024-06-14 00:20:26 +02:00
|
|
|
print(element.text)
|
2024-06-15 23:40:34 +02:00
|
|
|
# print(href)
|
2024-06-15 23:44:41 +02:00
|
|
|
if(interactive):
|
|
|
|
time.sleep(300)
|
2024-06-16 07:38:30 +02:00
|
|
|
else:
|
2024-06-17 19:16:28 +02:00
|
|
|
time.sleep(4)
|
2024-06-13 03:03:39 +02:00
|
|
|
|
|
|
|
driver.close()
|
2024-06-15 23:40:34 +02:00
|
|
|
return os.path.basename(href)
|
2024-06-13 03:03:39 +02:00
|
|
|
|
2024-06-16 17:00:46 +02:00
|
|
|
def remove_first_quote_if_odd(text, verbose = 0):
|
|
|
|
double_quote_cnt = text.count('"')
|
|
|
|
# single_quote_cnt = text.count("'")
|
|
|
|
|
|
|
|
if (double_quote_cnt % 2 != 0): # or (single_quote_cnt % 2 != 0):
|
|
|
|
# Find and remove the first quote
|
|
|
|
for i, char in enumerate(text):
|
|
|
|
if char in ['"']: # ['"', "'"]:
|
2024-06-17 19:16:28 +02:00
|
|
|
if (verbose > 1):
|
2024-06-16 17:00:46 +02:00
|
|
|
print(text)
|
|
|
|
text = text[:i] + text[i+1:]
|
2024-06-17 19:16:28 +02:00
|
|
|
if (verbose > 1):
|
2024-06-16 17:00:46 +02:00
|
|
|
print(text)
|
|
|
|
break
|
|
|
|
return text
|
|
|
|
|
|
|
|
def is_clubstation(call):
|
|
|
|
assert(len(call) > 3)
|
2024-06-17 19:16:28 +02:00
|
|
|
if call[3].upper() == 'X' or call.upper() == 'OE5SIX': # special case with OE5SIX (Clubstation)
|
2024-06-16 17:00:46 +02:00
|
|
|
return True
|
2024-06-19 22:45:21 +02:00
|
|
|
|
2024-06-16 17:00:46 +02:00
|
|
|
return False
|
|
|
|
|
2024-06-17 19:16:28 +02:00
|
|
|
|
2024-06-16 17:00:46 +02:00
|
|
|
def replace_substring_with_line(path, search_substring, verbose=0):
|
2024-06-17 20:59:16 +02:00
|
|
|
|
2024-06-16 17:00:46 +02:00
|
|
|
try:
|
2024-06-17 19:16:28 +02:00
|
|
|
if not replace_substring_with_line.lines:
|
|
|
|
with open(path, 'r') as file:
|
|
|
|
replace_substring_with_line.lines = file.readlines()
|
|
|
|
|
|
|
|
for line in replace_substring_with_line.lines:
|
|
|
|
if search_substring[0:46].lower() in line.lower():
|
2024-06-17 20:59:16 +02:00
|
|
|
return line.strip()
|
2024-06-16 17:00:46 +02:00
|
|
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
print(f'The file {path} was not found.')
|
|
|
|
except Exception as e:
|
|
|
|
print(f'An error occurred: {e}')
|
2024-06-19 22:45:21 +02:00
|
|
|
|
2024-06-16 17:00:46 +02:00
|
|
|
return search_substring
|
|
|
|
|
2024-06-17 19:16:28 +02:00
|
|
|
replace_substring_with_line.lines = None
|
|
|
|
|
|
|
|
def gender_substring(path, search_substring, verbose=0):
|
|
|
|
try:
|
|
|
|
if not gender_substring.lines:
|
|
|
|
with open(path, 'r') as file:
|
|
|
|
gender_substring.lines = file.readlines()
|
|
|
|
|
|
|
|
for line in gender_substring.lines:
|
2024-06-19 22:45:21 +02:00
|
|
|
if line[2:].strip() == search_substring: # search from position 2 and remove all spaces or \n chars
|
|
|
|
return line[0] # return the char of gender i.e. 'f' or 'm'
|
2024-06-17 19:16:28 +02:00
|
|
|
except FileNotFoundError:
|
|
|
|
print(f'The file {path} was not found.')
|
|
|
|
except Exception as e:
|
|
|
|
print(f'An error occurred: {e}')
|
|
|
|
|
|
|
|
return 'x' # not found, unknown gender
|
|
|
|
|
|
|
|
gender_substring.lines = None
|
|
|
|
|
2024-06-23 19:49:35 +02:00
|
|
|
def get_active_path(file):
|
2024-06-23 15:21:15 +02:00
|
|
|
return os.path.join(os.path.dirname(os.path.abspath(__file__)), file)
|
|
|
|
|
2024-06-17 19:16:28 +02:00
|
|
|
def get_gender(firstnames, surname, call, verbose=0):
|
|
|
|
|
|
|
|
# load the .gender file:
|
|
|
|
genderfile = '.gender'
|
|
|
|
gender = 'x'
|
2024-06-23 15:21:15 +02:00
|
|
|
gpath = get_active_path(genderfile)
|
2024-06-17 19:16:28 +02:00
|
|
|
if os.path.exists(gpath):
|
|
|
|
firstname = firstnames.split(' ', 1)[0]
|
|
|
|
gender = gender_substring(gpath, firstname, verbose=0) # only check 1st/firstname of name, important when there are more than 1 firstnames
|
|
|
|
if gender == 'x':
|
|
|
|
if verbose > 0:
|
|
|
|
get_gender.cnt += 1
|
|
|
|
print(f'({get_gender.cnt}){call} "{firstname}" [{firstnames} {surname}] not found in file {genderfile} - gender "x" is set.')
|
|
|
|
return gender
|
|
|
|
|
|
|
|
get_gender.cnt = 0
|
|
|
|
|
|
|
|
def call_split_name(fullname, call, verbose):
|
|
|
|
|
|
|
|
assert(len(fullname) > 1)
|
|
|
|
|
|
|
|
name = fullname.split(' ', 1)
|
|
|
|
surname = name[0]
|
2024-06-19 22:45:21 +02:00
|
|
|
# several special cases like surname "de Lijezer", "van Dijk", "el Shamaa", "da Silva", etc.
|
2024-06-17 19:16:28 +02:00
|
|
|
match surname.lower():
|
2024-06-19 22:45:21 +02:00
|
|
|
case 'de' | 'el' | 'da':
|
2024-06-17 19:16:28 +02:00
|
|
|
name = fullname[3:].split(' ',1)
|
|
|
|
surname = surname.lower() + ' ' + name[0]
|
2024-06-19 19:51:08 +02:00
|
|
|
if verbose > 1:
|
2024-06-17 19:16:28 +02:00
|
|
|
print(f'## {fullname} --> {surname} ##')
|
|
|
|
|
|
|
|
case 'van' | 'von' :
|
|
|
|
name = fullname[4:].split(' ',1)
|
|
|
|
surname = surname.lower() + ' ' + name[0]
|
|
|
|
if surname.lower() in ['van der', 'von der', 'van den']: # e.g. "van der Meulen", "Walther von der Vogelweide", "Annie van den Berg"
|
|
|
|
name = fullname[8:].split(' ',1)
|
|
|
|
surname = surname.lower() + ' ' + name[0]
|
2024-06-19 19:51:08 +02:00
|
|
|
if verbose > 1:
|
2024-06-17 19:16:28 +02:00
|
|
|
print(f'## {fullname} --> {surname} ##')
|
|
|
|
case 'della' : # Ancient Italian noble family "della Rowere"
|
|
|
|
name = fullname[6:].split(' ',1)
|
|
|
|
surname = surname.lower() + ' ' + name[0]
|
2024-06-19 19:51:08 +02:00
|
|
|
if verbose > 1:
|
2024-06-17 19:16:28 +02:00
|
|
|
print(f'## {fullname} --> {surname} ##')
|
2024-06-20 03:27:45 +02:00
|
|
|
case 'senarclens' : # Senarclens de Grancy
|
|
|
|
if fullname.lower().startswith('senarclens de grancy'):
|
|
|
|
name = fullname[21:].split(' ',1)
|
|
|
|
surname = 'Senarclens de Grancy'
|
|
|
|
if verbose > 1:
|
|
|
|
print(f'## {fullname} --> {surname} ##')
|
2024-06-17 19:16:28 +02:00
|
|
|
|
|
|
|
if len(name) > 1:
|
2024-06-20 03:27:45 +02:00
|
|
|
firstname = name[1].lstrip() # FIX when there are more than 1 space b/w surname and firstname
|
2024-06-17 19:16:28 +02:00
|
|
|
else:
|
|
|
|
firstname = '<unknown>'
|
|
|
|
|
|
|
|
# In Austria the call suffix starting with Y is an YL (young lady)
|
|
|
|
# if call[3].upper() == 'Y':
|
|
|
|
if False:
|
|
|
|
gender = 'f'
|
|
|
|
else:
|
|
|
|
gender = get_gender(firstname, surname, call, verbose)
|
|
|
|
|
|
|
|
return firstname, surname, gender
|
|
|
|
|
|
|
|
def fix_typo(call, fullname, verbose=1):
|
|
|
|
fixtypofile = '.typo_callbook'
|
|
|
|
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), fixtypofile)
|
|
|
|
if os.path.exists(path):
|
|
|
|
try:
|
|
|
|
if not fix_typo.lines:
|
|
|
|
with open(path, 'r') as file:
|
|
|
|
fix_typo.lines = file.readlines()
|
2024-06-17 20:01:16 +02:00
|
|
|
if verbose > 0:
|
|
|
|
print(f'File "{fixtypofile}":')
|
|
|
|
for line in fix_typo.lines:
|
|
|
|
print(f'>> {line.rstrip()}')
|
|
|
|
print('>> ** EOF **')
|
2024-06-17 19:16:28 +02:00
|
|
|
|
|
|
|
for line in fix_typo.lines[4:]: # starting with line 4
|
|
|
|
if len(fix_typo.spaces) == 0: # not initialized
|
|
|
|
if line[0] == '*':
|
|
|
|
words = line.split()
|
|
|
|
assert len(words) == 4 # i.e. '*, call, nachname, vorname'
|
|
|
|
fix_typo.spaces = [line.index(words[1]), line.index(words[2]), line.index(words[3])]
|
|
|
|
else:
|
|
|
|
if call in line[2:8]:
|
2024-06-17 20:59:16 +02:00
|
|
|
if verbose > 1:
|
2024-06-17 20:01:16 +02:00
|
|
|
print(f'Call: {call} found')
|
2024-06-17 19:16:28 +02:00
|
|
|
match line[0]:
|
|
|
|
case '#':
|
|
|
|
if verbose > 1:
|
|
|
|
print(line.rstrip())
|
2024-06-20 03:27:45 +02:00
|
|
|
case 'F' | 'N':
|
2024-06-17 19:16:28 +02:00
|
|
|
if verbose > 0:
|
|
|
|
print(line.rstrip())
|
2024-06-17 20:59:16 +02:00
|
|
|
print(fullname)
|
2024-06-17 19:16:28 +02:00
|
|
|
firstname1, surname1, gender1 = call_split_name(fullname, call, 0)
|
|
|
|
fullname2 = line[fix_typo.spaces[1]:fix_typo.spaces[2]-1].rstrip() + ' ' + line[fix_typo.spaces[2]:-1]
|
|
|
|
if verbose > 0:
|
|
|
|
print(fullname2)
|
|
|
|
firstname2, surname2, gender2 = call_split_name(fullname2, call, 0)
|
|
|
|
# Hardening: at a minimum, either the firstnames or the surenames must fit
|
|
|
|
fix_cnt = 0
|
|
|
|
if (firstname1 != firstname2):
|
|
|
|
fix_cnt += 1
|
|
|
|
if (surname1 != surname2):
|
|
|
|
fix_cnt += 1
|
2024-06-20 03:27:45 +02:00
|
|
|
# when the surname is splitted and wrong written:
|
|
|
|
if line[0] == 'N' and firstname2 in fullname:
|
|
|
|
if fix_cnt > 1: fix_cnt = 1
|
2024-06-17 19:16:28 +02:00
|
|
|
if fix_cnt == 0:
|
|
|
|
print(f'It is fixed! You can remove the line with the item {call} from the file {fixtypofile}!')
|
|
|
|
elif fix_cnt > 1:
|
|
|
|
print(f'Something went wrong, there are several bugs. Check line with call {call} in file {fixtypofile}!')
|
|
|
|
else:
|
|
|
|
fullname = fullname2
|
|
|
|
case 'X': # exchange the surname with firstname
|
|
|
|
if verbose > 0:
|
|
|
|
print(line.rstrip())
|
2024-06-17 20:59:16 +02:00
|
|
|
print(fullname)
|
2024-06-17 19:16:28 +02:00
|
|
|
firstname1, surname1, gender1 = call_split_name(fullname, call, 0)
|
|
|
|
fullname2 = line[fix_typo.spaces[1]:fix_typo.spaces[2]-1].rstrip() + ' ' + line[fix_typo.spaces[2]:-1]
|
|
|
|
if verbose > 0:
|
|
|
|
print(fullname2)
|
|
|
|
firstname2, surname2, gender2 = call_split_name(fullname2, call, 0)
|
|
|
|
fix_cnt = 0
|
|
|
|
if (firstname1 == firstname2) and (surname1 == surname2):
|
|
|
|
print(f'It is fixed! You can remove the line with the item {call} from the file {fixtypofile}!')
|
|
|
|
elif (firstname1 != surname2) or (surname1 != firstname2):
|
|
|
|
print(f'Something went wrong, there are several bugs. Check line with call {call} in file {fixtypofile}!')
|
|
|
|
else:
|
|
|
|
fullname = fullname2
|
|
|
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
print(f'The file {path} was not found.')
|
|
|
|
except Exception as e:
|
|
|
|
print(f'An error occurred: {e}')
|
|
|
|
|
|
|
|
return fullname
|
|
|
|
fix_typo.lines = None
|
|
|
|
fix_typo.spaces = []
|
2024-06-16 17:00:46 +02:00
|
|
|
|
2024-06-19 19:51:08 +02:00
|
|
|
def call_data_record(line, mod_date, verbose, cur):
|
2024-06-16 17:00:46 +02:00
|
|
|
|
|
|
|
# we have to split the record with a cost-intensive regular expression
|
|
|
|
# record = re.split('OE[0-9][A-Z]{1,3}[ \t]{3,20}',line) # this does not work 100%
|
|
|
|
# record = re.findall(r'(OE[0-9][A-Z]{1,3})[ \t]{2,12}([A-ZÄÖÜ].+[ ]?.*[ ]?.*[ ]?.*)[ \t]{3,30}(.{3,30})[ \t]{3,30}([1,3,4])', line)
|
|
|
|
# record = re.search(r'(OE[0-9][A-Z]{1,3})[ ]{2,12}([. ]+)[ ]{3,50}([. ]+)[ ]{3,50}([1-4])', line)
|
|
|
|
# record = re.search(r'^(OE[0-9][A-Z]{1,3})[ \t]{2,20}([\w ]{1,12})[ ]{3,50}(.*)([1-4]{1})$', line)
|
|
|
|
|
|
|
|
# Never split Addresses containing 2 or 3 spaces, also several records contain no address or no location
|
|
|
|
records = re.split(r'[ ]{4,65}', line)
|
|
|
|
# [records for record in records]
|
|
|
|
|
|
|
|
if verbose > 2 :
|
|
|
|
print(f'Record length: {len(records)}')
|
|
|
|
|
|
|
|
for m in records:
|
|
|
|
print(m)
|
|
|
|
|
|
|
|
# HARDENING:
|
|
|
|
assert(len(records) == 5)
|
|
|
|
# OE Call:
|
|
|
|
call = records[0]
|
|
|
|
match = re.search(r'^(OE[0-9][A-Z]{1,3})', call)
|
|
|
|
assert(match.string == call)
|
|
|
|
fullname = records[1]
|
2024-06-17 19:16:28 +02:00
|
|
|
location = records[2]
|
|
|
|
address = records[3]
|
|
|
|
permit_class = records[4]
|
|
|
|
fullname = fix_typo(call, fullname, verbose)
|
2024-06-19 19:51:08 +02:00
|
|
|
firstname = ''
|
|
|
|
surname = ''
|
2024-06-16 17:00:46 +02:00
|
|
|
# If there is a clubstation
|
|
|
|
if is_clubstation(call):
|
|
|
|
# Name starting with only one quotation marks e.g. " -- remove that one:
|
|
|
|
fullname = remove_first_quote_if_odd(fullname, verbose) # only found @ clubstations
|
|
|
|
clubstationfile = '.callbook_club'
|
2024-06-17 19:16:28 +02:00
|
|
|
if verbose > 1:
|
2024-06-16 17:00:46 +02:00
|
|
|
print(f'Call: {call}, Name: {fullname}')
|
|
|
|
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), clubstationfile)
|
|
|
|
if os.path.exists(path):
|
|
|
|
fullname = replace_substring_with_line(path, fullname, verbose)
|
2024-06-17 19:16:28 +02:00
|
|
|
gender = '*'
|
2024-06-19 19:51:08 +02:00
|
|
|
firstname = fullname.strip()
|
2024-06-17 19:16:28 +02:00
|
|
|
elif fullname[0] == '*':
|
|
|
|
gender = '*'
|
|
|
|
else: # Try to split the YL or OMs Name, guess the gender
|
|
|
|
firstname, surname, gender = call_split_name(fullname, call, verbose)
|
|
|
|
if verbose > 1:
|
|
|
|
if gender == '*':
|
|
|
|
print(f'Call: {call}, Name: {fullname}, Gender: {gender}')
|
|
|
|
else:
|
|
|
|
print(f'Call: {call}, First Name: {firstname}, Surname: {surname}, Gender: {gender}')
|
2024-06-16 17:00:46 +02:00
|
|
|
|
2024-06-17 19:16:28 +02:00
|
|
|
print(f'Location: {location}, Address: {address}, Permit: {permit_class}')
|
2024-06-16 17:00:46 +02:00
|
|
|
|
2024-06-19 19:51:08 +02:00
|
|
|
created = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
call_data_record.cnt += 1 # increment the User_id
|
|
|
|
user_id = call_data_record.cnt
|
|
|
|
# print(created)
|
|
|
|
statement = "INSERT INTO `callbook_user`(`user_id`,`call`,`firstname`,`surname`,`gender`,`created`,`modified`) VALUES (%s, %s, %s, %s, %s, %s, %s)"
|
|
|
|
data = (user_id,call,firstname,surname,gender,created,created)
|
2024-06-16 17:00:46 +02:00
|
|
|
|
2024-06-19 19:51:08 +02:00
|
|
|
try:
|
|
|
|
# cur.execute(f"INSERT INTO `callbook_user` (`call`,`firstname`,`surname`,\
|
|
|
|
# `created`,`created_by`,`modified`,`modified_by`,`active`)\
|
|
|
|
# VALUES ('{call}','{firstname}','{surname}','{created}','0','{created}','0','{created}');")
|
|
|
|
# cur.execute(f'INSERT INTO `callbook_user` (`call`) VALUES ("{call}");')
|
|
|
|
cur.execute(statement, data)
|
|
|
|
except mariadb.Error as e:
|
|
|
|
print(f'\n[WARN] MySQLError during execute statement\n\tArgs: {e.args}')
|
|
|
|
except Exception as e:
|
|
|
|
print('Error: {}'.format(e), file=sys.stderr)
|
|
|
|
|
|
|
|
call_data_record.cnt = 0
|
|
|
|
|
|
|
|
def call_analyse_pdf(file, verbose, cur):
|
2024-06-16 07:38:30 +02:00
|
|
|
|
|
|
|
# Define a regular expression to match tables
|
|
|
|
|
|
|
|
reader = PdfReader(file)
|
|
|
|
meta = reader.metadata
|
2024-06-16 17:00:46 +02:00
|
|
|
if verbose:
|
|
|
|
print(verbose)
|
2024-06-19 19:51:08 +02:00
|
|
|
print(' Pages:', len(reader.pages))
|
2024-06-16 17:00:46 +02:00
|
|
|
# All of the following could be None!
|
|
|
|
print(f' Author: {meta.author}')
|
|
|
|
print(f' Creator: {meta.creator}')
|
|
|
|
print(f'Producer: {meta.producer}')
|
|
|
|
print(f' Subject: {meta.subject}')
|
|
|
|
print(f' Title: {meta.title}')
|
|
|
|
print(f' Created: {meta.creation_date}')
|
|
|
|
print(f'Modified: {meta.modification_date}')
|
2024-06-17 19:16:28 +02:00
|
|
|
|
2024-06-16 07:38:30 +02:00
|
|
|
for page in reader.pages:
|
|
|
|
page_text = page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False)
|
2024-06-16 17:00:46 +02:00
|
|
|
lines = page_text.strip().splitlines()
|
|
|
|
for line in lines[3:-2]:
|
|
|
|
line = line.strip()
|
2024-06-17 19:16:28 +02:00
|
|
|
|
2024-06-16 17:00:46 +02:00
|
|
|
if verbose >= 2:
|
|
|
|
print(line)
|
2024-06-19 19:51:08 +02:00
|
|
|
call_data_record(line, meta.modification_date,verbose, cur)
|
|
|
|
|
|
|
|
def exec_sql_file(cursor, sql_file):
|
|
|
|
statement = ''
|
|
|
|
try:
|
|
|
|
for line in open(sql_file):
|
|
|
|
if line.strip().startswith('--'): # ignore sql comment lines
|
|
|
|
continue
|
|
|
|
if line.strip().endswith(';'): # keep appending lines that don't end in ';'
|
|
|
|
statement += line
|
|
|
|
try:
|
|
|
|
cursor.execute(statement)
|
|
|
|
except mariadb.Error as e: # (OperationalError, ProgrammingError) as e:
|
|
|
|
print(f'\n[WARN] MySQLError during execute statement\n\tArgs: {e.args}')
|
|
|
|
statement = ''
|
|
|
|
else:
|
|
|
|
statement += line
|
|
|
|
except FileNotFoundError:
|
|
|
|
print(f'The file {path} was not found.')
|
|
|
|
except Exception as e:
|
|
|
|
print('Error: {}'.format(e), file=sys.stderr)
|
2024-06-16 07:38:30 +02:00
|
|
|
|
2024-06-13 03:03:39 +02:00
|
|
|
if __name__ == '__main__':
|
2024-06-14 00:20:26 +02:00
|
|
|
args = call_parser()
|
2024-06-23 15:21:15 +02:00
|
|
|
config = call_configparser()
|
2024-06-13 03:03:39 +02:00
|
|
|
try:
|
|
|
|
filename = call_website(**vars(args))
|
2024-06-17 19:16:28 +02:00
|
|
|
if args.verbose > 1:
|
|
|
|
print(f'Filename: {filename}')
|
2024-06-19 19:51:08 +02:00
|
|
|
|
2024-06-13 03:03:39 +02:00
|
|
|
except Exception as e:
|
|
|
|
print('Error: {}'.format(e), file=sys.stderr)
|
|
|
|
sys.exit(1)
|
2024-06-19 19:51:08 +02:00
|
|
|
|
|
|
|
try:
|
2024-06-23 19:49:35 +02:00
|
|
|
configdb = config['MariaDB'] # fetch from config file, then do with fallbacks:
|
2024-06-19 19:51:08 +02:00
|
|
|
conn = mariadb.connect(
|
2024-06-23 19:49:35 +02:00
|
|
|
user = configdb.get('user', 'om'),
|
|
|
|
password = configdb.get('password','oe3tkt'),
|
|
|
|
host = configdb.get('host','127.0.0.1'),
|
|
|
|
port = configdb.getint('port',3306),
|
|
|
|
database = configdb.get('database','callbook')
|
2024-06-19 19:51:08 +02:00
|
|
|
)
|
|
|
|
except mariadb.Error as e:
|
|
|
|
print(f'Error connectiong to MariaDB platform: {e}')
|
|
|
|
sys.exit(5)
|
2024-06-23 19:49:35 +02:00
|
|
|
print(f'Connected to MariaDB API Version {mariadb.mariadbapi_version}.')
|
2024-06-19 19:51:08 +02:00
|
|
|
|
|
|
|
print(datetime.datetime.now(datetime.UTC))
|
|
|
|
# Get Cursor
|
|
|
|
cur = conn.cursor()
|
2024-06-23 19:49:35 +02:00
|
|
|
cur.execute('SELECT VERSION()')
|
|
|
|
print(cur.fetchone()[0])
|
|
|
|
|
2024-06-19 19:51:08 +02:00
|
|
|
sql_file = '.sql_init'
|
|
|
|
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), sql_file)
|
|
|
|
exec_sql_file(cur, path)
|
|
|
|
|
|
|
|
call_analyse_pdf(filename,args.verbose,cur)
|
|
|
|
conn.commit()
|
|
|
|
cur.close()
|
|
|
|
conn.close()
|
|
|
|
sys.exit(0)
|