From ad1a7c4507ed43170f813f386eff695382f56387 Mon Sep 17 00:00:00 2001 From: Thomas Kuschel Date: Sun, 16 Jun 2024 17:00:46 +0200 Subject: [PATCH] ADD .callbook_club file to expand clubstation names --- afu/.callbook_club | 14 ++++ afu/README.md | 9 +++ afu/callbook.py | 180 +++++++++++++++++++++++++++++++++------------ 3 files changed, 158 insertions(+), 45 deletions(-) create mode 100644 afu/.callbook_club diff --git a/afu/.callbook_club b/afu/.callbook_club new file mode 100644 index 0000000..13ae058 --- /dev/null +++ b/afu/.callbook_club @@ -0,0 +1,14 @@ +Landesverband Wien im Österreichischen Versuchssenderverband +Amateurfunkverein des Österreichischen Bundesheeres - Austrian Military Radio Society +Radio-Amateur-Klub der Technischen Universität Wien - Radio Amateur Club of the TU Wien +Landesverband Tirol des Österreichischen Versuchssenderverbands +Österreichisches Rotes Kreuz, Landesverband Vorarlberg +Kulturverein der österreichischen Eisenbahner - Sektion Amateurfunk +Höhere Technische Bundeslehr- und Versuchsanstalt Innsbruck Anichstraße +Österreichisches Rotes Kreuz, Landesverband Tirol +Johanniter Tirol Rettungs- und Einsatzdienste mildtätige GmbH +Österreichischer Versuchssenderverband - Dachverband +Landesverband Niederösterreich des Österreichischen Versuchssenderverbands +Amateurfunkverband Salzburg - Landesverband des Österreichischen Versuchssenderverbandes +"OAFV" des ÖVSV, Ortsgruppe Ried - Grieskirchen +OÖ Amateurfunkverband, Ortsgruppe Ried-Grieskirchen diff --git a/afu/README.md b/afu/README.md index 84b266e..d383286 100644 --- a/afu/README.md +++ b/afu/README.md @@ -220,3 +220,12 @@ Clone the repository `script` to your site with: ~/gitea$ git clone ssh://git@kuschel.at:21861/public/scripts.git + +## Connecting to MariaDB database + +## Install python-mariadb + + $ yay -S python-mysql-connector + +Hint: At the moment the compilation fails. Will be updated soon. + See https://jira.mariadb.org/projects/CONPY/issues/CONPY-284 \ No newline at end of file diff --git a/afu/callbook.py b/afu/callbook.py index e15c56e..e6ba3f1 100755 --- a/afu/callbook.py +++ b/afu/callbook.py @@ -28,14 +28,26 @@ def call_parser(): Version {__version__} ''' ) - parser.add_argument('--interactive', '-i', action='store_true', default=False) - # parser.add_argument('--server', '-s', default=__website__, required=False) - parser.add_argument('--version', '-v', action='version', version='{} {}'.format(os.path.split(__file__)[1],__version__)) + parser.add_argument('-i', '--interactive', action='store_true', default=False) + # parser.add_argument('-s', '--server' default=__website__, required=False) + parser.add_argument('-V', '--version', action='version', version='{} {}'.format(os.path.split(__file__)[1],__version__)) + parser.add_argument('-v', '--verbose', action='append_const', const = 1) + parser.add_argument('-p', '--path', default='Rufzeichenliste_AT_Stand_010624.pdf', help= 'skip the download if the specified path to a PDF file exists') parser.add_argument('url', metavar='URL', nargs='?', default=__website__) - return parser.parse_args() + opt = parser.parse_args() + opt.verbose = 0 if opt.verbose is None else sum(opt.verbose) + return opt + +def call_website(url,verbose,path='',interactive=False): + + if path: + if os.path.exists(path): + return path + else: + print(f'The given path "{path}" does not exist.') + sys.exit(3) -def call_website(url,interactive=False): if(interactive): print('Interactive') driver=webdriver.Chrome() @@ -71,61 +83,139 @@ def call_website(url,interactive=False): driver.close() return os.path.basename(href) -def get_pdf_content_lines(pdf_file_path): - with open(pdf_file_path) as f: - pdf_reader = PdfReader(f) - for page in pdf_reader.pages: - for line in page.extractText().spitlines(): - yield line +def remove_first_quote_if_odd(text, verbose = 0): + double_quote_cnt = text.count('"') + # single_quote_cnt = text.count("'") -def call_analyse_pdf(file): + if (double_quote_cnt % 2 != 0): # or (single_quote_cnt % 2 != 0): + # Find and remove the first quote + for i, char in enumerate(text): + if char in ['"']: # ['"', "'"]: + if (verbose > 0): + print(text) + text = text[:i] + text[i+1:] + if (verbose > 0): + print(text) + break + return text + +def is_clubstation(call): + assert(len(call) > 3) + if call[3].upper() == 'X': + return True + return False + +def replace_substring_with_line(path, search_substring, verbose=0): + + try: + with open(path, 'r') as file: + lines = file.readlines() + + search_substring + for line in lines: + if search_substring[0:40].lower() in line.lower(): + modified_line = line.strip() + # Replace the substring with the whole line + ## line = line.lower().replace(search_substring.lower(), modified_line) + modified_line = line + return modified_line + + except FileNotFoundError: + print(f'The file {path} was not found.') + except Exception as e: + print(f'An error occurred: {e}') + + return search_substring + + +def call_data_record(line, mod_date, verbose): + + # we have to split the record with a cost-intensive regular expression + # record = re.split('OE[0-9][A-Z]{1,3}[ \t]{3,20}',line) # this does not work 100% + # record = re.findall(r'(OE[0-9][A-Z]{1,3})[ \t]{2,12}([A-ZÄÖÜ].+[ ]?.*[ ]?.*[ ]?.*)[ \t]{3,30}(.{3,30})[ \t]{3,30}([1,3,4])', line) + # record = re.search(r'(OE[0-9][A-Z]{1,3})[ ]{2,12}([. ]+)[ ]{3,50}([. ]+)[ ]{3,50}([1-4])', line) + # record = re.search(r'^(OE[0-9][A-Z]{1,3})[ \t]{2,20}([\w ]{1,12})[ ]{3,50}(.*)([1-4]{1})$', line) + + # Never split Addresses containing 2 or 3 spaces, also several records contain no address or no location + records = re.split(r'[ ]{4,65}', line) + # [records for record in records] + + if verbose > 2 : + print(f'Record length: {len(records)}') + + for m in records: + print(m) + + # HARDENING: + assert(len(records) == 5) + # OE Call: + call = records[0] + match = re.search(r'^(OE[0-9][A-Z]{1,3})', call) + assert(match.string == call) + fullname = records[1] + # If there is a clubstation + if is_clubstation(call): + # Name starting with only one quotation marks e.g. " -- remove that one: + fullname = remove_first_quote_if_odd(fullname, verbose) # only found @ clubstations + clubstationfile = '.callbook_club' + if verbose > 0: + print(f'Call: {call}, Name: {fullname}') + path = os.path.join(os.path.dirname(os.path.abspath(__file__)), clubstationfile) + if os.path.exists(path): + fullname = replace_substring_with_line(path, fullname, verbose) + if verbose > 0: + print(f'Call: {call}, Name: {fullname}') + + + +# if not record: +# return +# if verbose == 1: +# print(record.group(1)) +# if verbose >= 3: +# print(f'Call: {record.group(1)}') +# print(f'Name: {record.group(2)}') +# #print(f'Location: {record[3]}') +# #print(f'Address: {record[4]}') +# #print(f'Permit Class: {record[5]}') + +def call_analyse_pdf(file, verbose): # Define a regular expression to match tables reader = PdfReader(file) meta = reader.metadata - print(' Pages:', len(reader.pages)) - # All of the following could be None! - print(f' Author: {meta.author}') - print(f' Creator: {meta.creator}') - print(f'Producer: {meta.producer}') - print(f' Subject: {meta.subject}') - print(f' Title: {meta.title}') - print(f' Created: {meta.creation_date}') - print(f'Modified: {meta.modification_date}') + if verbose: + print(verbose) + print(' Pages:', len(reader.pages)) + # All of the following could be None! + print(f' Author: {meta.author}') + print(f' Creator: {meta.creator}') + print(f'Producer: {meta.producer}') + print(f' Subject: {meta.subject}') + print(f' Title: {meta.title}') + print(f' Created: {meta.creation_date}') + print(f'Modified: {meta.modification_date}') + for page in reader.pages: page_text = page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False) - #print(page_text) - # page_text = page.extract_text(extraction_mode="layout", layout_mode_scale_weight=1.0) - # print(page_text) - # Find all tables in page_text - calls = re.findall(r' +(OE[0-9][A-Z]{1,3}).*$', page_text) - for call in calls: - print(call) - tables = re.findall(r'(?s)\b(?:\w+\s+){2,}\w+\b(?:\s*[,;]\s*\b(?:\w+\s+){2,}\w+\b)*', page_text) - # Loop through each table and create a pandas DataFrame - for table in tables: - # Split the table into rows - rows = table.strip().split('\n') - # Split the rows into cells - cells = [row.split('|') for row in rows] - # Remove leading and trailing whitespace from cells - cells = [[cell.strip() for cell in row] for row in cells] - # Remove empty rows and columns - cells = [[cell for cell in row if cell] for row in cells if row] - # Create a pandas DataFrame from the cells - df = pd.DataFrame(cells[1:], columns=cells[0]) - - # TODO: Clean and manipulate the df as needed - + lines = page_text.strip().splitlines() + for line in lines[3:-2]: + line = line.strip() + # calls = re.findall(r' +(OE[0-9][A-Z]{1,3}).*$', page_text) + if verbose >= 2: + print(line) + call_data_record(line, meta.modification_date,verbose) if __name__ == '__main__': # call_description() args = call_parser() + # filename = 'Rufzeichenliste_AT_Stand_010624.pdf' try: filename = call_website(**vars(args)) + print(f'Filename: {filename}') - call_analyse_pdf(filename) + call_analyse_pdf(filename,args.verbose) sys.exit(0) except Exception as e: print('Error: {}'.format(e), file=sys.stderr)