ADD .callbook_club file to expand clubstation names
This commit is contained in:
parent
23bd746108
commit
ad1a7c4507
14
afu/.callbook_club
Normal file
14
afu/.callbook_club
Normal file
@ -0,0 +1,14 @@
|
||||
Landesverband Wien im Österreichischen Versuchssenderverband
|
||||
Amateurfunkverein des Österreichischen Bundesheeres - Austrian Military Radio Society
|
||||
Radio-Amateur-Klub der Technischen Universität Wien - Radio Amateur Club of the TU Wien
|
||||
Landesverband Tirol des Österreichischen Versuchssenderverbands
|
||||
Österreichisches Rotes Kreuz, Landesverband Vorarlberg
|
||||
Kulturverein der österreichischen Eisenbahner - Sektion Amateurfunk
|
||||
Höhere Technische Bundeslehr- und Versuchsanstalt Innsbruck Anichstraße
|
||||
Österreichisches Rotes Kreuz, Landesverband Tirol
|
||||
Johanniter Tirol Rettungs- und Einsatzdienste mildtätige GmbH
|
||||
Österreichischer Versuchssenderverband - Dachverband
|
||||
Landesverband Niederösterreich des Österreichischen Versuchssenderverbands
|
||||
Amateurfunkverband Salzburg - Landesverband des Österreichischen Versuchssenderverbandes
|
||||
"OAFV" des ÖVSV, Ortsgruppe Ried - Grieskirchen
|
||||
OÖ Amateurfunkverband, Ortsgruppe Ried-Grieskirchen
|
@ -220,3 +220,12 @@ Clone the repository `script` to your site with:
|
||||
|
||||
~/gitea$ git clone ssh://git@kuschel.at:21861/public/scripts.git
|
||||
|
||||
|
||||
## Connecting to MariaDB database
|
||||
|
||||
## Install python-mariadb
|
||||
|
||||
$ yay -S python-mysql-connector
|
||||
|
||||
Hint: At the moment the compilation fails. Will be updated soon.
|
||||
See https://jira.mariadb.org/projects/CONPY/issues/CONPY-284
|
180
afu/callbook.py
180
afu/callbook.py
@ -28,14 +28,26 @@ def call_parser():
|
||||
Version {__version__}
|
||||
'''
|
||||
)
|
||||
parser.add_argument('--interactive', '-i', action='store_true', default=False)
|
||||
# parser.add_argument('--server', '-s', default=__website__, required=False)
|
||||
parser.add_argument('--version', '-v', action='version', version='{} {}'.format(os.path.split(__file__)[1],__version__))
|
||||
parser.add_argument('-i', '--interactive', action='store_true', default=False)
|
||||
# parser.add_argument('-s', '--server' default=__website__, required=False)
|
||||
parser.add_argument('-V', '--version', action='version', version='{} {}'.format(os.path.split(__file__)[1],__version__))
|
||||
parser.add_argument('-v', '--verbose', action='append_const', const = 1)
|
||||
parser.add_argument('-p', '--path', default='Rufzeichenliste_AT_Stand_010624.pdf', help= 'skip the download if the specified path to a PDF file exists')
|
||||
parser.add_argument('url', metavar='URL', nargs='?', default=__website__)
|
||||
|
||||
return parser.parse_args()
|
||||
opt = parser.parse_args()
|
||||
opt.verbose = 0 if opt.verbose is None else sum(opt.verbose)
|
||||
return opt
|
||||
|
||||
def call_website(url,verbose,path='',interactive=False):
|
||||
|
||||
if path:
|
||||
if os.path.exists(path):
|
||||
return path
|
||||
else:
|
||||
print(f'The given path "{path}" does not exist.')
|
||||
sys.exit(3)
|
||||
|
||||
def call_website(url,interactive=False):
|
||||
if(interactive):
|
||||
print('Interactive')
|
||||
driver=webdriver.Chrome()
|
||||
@ -71,61 +83,139 @@ def call_website(url,interactive=False):
|
||||
driver.close()
|
||||
return os.path.basename(href)
|
||||
|
||||
def get_pdf_content_lines(pdf_file_path):
|
||||
with open(pdf_file_path) as f:
|
||||
pdf_reader = PdfReader(f)
|
||||
for page in pdf_reader.pages:
|
||||
for line in page.extractText().spitlines():
|
||||
yield line
|
||||
def remove_first_quote_if_odd(text, verbose = 0):
|
||||
double_quote_cnt = text.count('"')
|
||||
# single_quote_cnt = text.count("'")
|
||||
|
||||
def call_analyse_pdf(file):
|
||||
if (double_quote_cnt % 2 != 0): # or (single_quote_cnt % 2 != 0):
|
||||
# Find and remove the first quote
|
||||
for i, char in enumerate(text):
|
||||
if char in ['"']: # ['"', "'"]:
|
||||
if (verbose > 0):
|
||||
print(text)
|
||||
text = text[:i] + text[i+1:]
|
||||
if (verbose > 0):
|
||||
print(text)
|
||||
break
|
||||
return text
|
||||
|
||||
def is_clubstation(call):
|
||||
assert(len(call) > 3)
|
||||
if call[3].upper() == 'X':
|
||||
return True
|
||||
return False
|
||||
|
||||
def replace_substring_with_line(path, search_substring, verbose=0):
|
||||
|
||||
try:
|
||||
with open(path, 'r') as file:
|
||||
lines = file.readlines()
|
||||
|
||||
search_substring
|
||||
for line in lines:
|
||||
if search_substring[0:40].lower() in line.lower():
|
||||
modified_line = line.strip()
|
||||
# Replace the substring with the whole line
|
||||
## line = line.lower().replace(search_substring.lower(), modified_line)
|
||||
modified_line = line
|
||||
return modified_line
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f'The file {path} was not found.')
|
||||
except Exception as e:
|
||||
print(f'An error occurred: {e}')
|
||||
|
||||
return search_substring
|
||||
|
||||
|
||||
def call_data_record(line, mod_date, verbose):
|
||||
|
||||
# we have to split the record with a cost-intensive regular expression
|
||||
# record = re.split('OE[0-9][A-Z]{1,3}[ \t]{3,20}',line) # this does not work 100%
|
||||
# record = re.findall(r'(OE[0-9][A-Z]{1,3})[ \t]{2,12}([A-ZÄÖÜ].+[ ]?.*[ ]?.*[ ]?.*)[ \t]{3,30}(.{3,30})[ \t]{3,30}([1,3,4])', line)
|
||||
# record = re.search(r'(OE[0-9][A-Z]{1,3})[ ]{2,12}([. ]+)[ ]{3,50}([. ]+)[ ]{3,50}([1-4])', line)
|
||||
# record = re.search(r'^(OE[0-9][A-Z]{1,3})[ \t]{2,20}([\w ]{1,12})[ ]{3,50}(.*)([1-4]{1})$', line)
|
||||
|
||||
# Never split Addresses containing 2 or 3 spaces, also several records contain no address or no location
|
||||
records = re.split(r'[ ]{4,65}', line)
|
||||
# [records for record in records]
|
||||
|
||||
if verbose > 2 :
|
||||
print(f'Record length: {len(records)}')
|
||||
|
||||
for m in records:
|
||||
print(m)
|
||||
|
||||
# HARDENING:
|
||||
assert(len(records) == 5)
|
||||
# OE Call:
|
||||
call = records[0]
|
||||
match = re.search(r'^(OE[0-9][A-Z]{1,3})', call)
|
||||
assert(match.string == call)
|
||||
fullname = records[1]
|
||||
# If there is a clubstation
|
||||
if is_clubstation(call):
|
||||
# Name starting with only one quotation marks e.g. " -- remove that one:
|
||||
fullname = remove_first_quote_if_odd(fullname, verbose) # only found @ clubstations
|
||||
clubstationfile = '.callbook_club'
|
||||
if verbose > 0:
|
||||
print(f'Call: {call}, Name: {fullname}')
|
||||
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), clubstationfile)
|
||||
if os.path.exists(path):
|
||||
fullname = replace_substring_with_line(path, fullname, verbose)
|
||||
if verbose > 0:
|
||||
print(f'Call: {call}, Name: {fullname}')
|
||||
|
||||
|
||||
|
||||
# if not record:
|
||||
# return
|
||||
# if verbose == 1:
|
||||
# print(record.group(1))
|
||||
# if verbose >= 3:
|
||||
# print(f'Call: {record.group(1)}')
|
||||
# print(f'Name: {record.group(2)}')
|
||||
# #print(f'Location: {record[3]}')
|
||||
# #print(f'Address: {record[4]}')
|
||||
# #print(f'Permit Class: {record[5]}')
|
||||
|
||||
def call_analyse_pdf(file, verbose):
|
||||
|
||||
# Define a regular expression to match tables
|
||||
|
||||
reader = PdfReader(file)
|
||||
meta = reader.metadata
|
||||
print(' Pages:', len(reader.pages))
|
||||
# All of the following could be None!
|
||||
print(f' Author: {meta.author}')
|
||||
print(f' Creator: {meta.creator}')
|
||||
print(f'Producer: {meta.producer}')
|
||||
print(f' Subject: {meta.subject}')
|
||||
print(f' Title: {meta.title}')
|
||||
print(f' Created: {meta.creation_date}')
|
||||
print(f'Modified: {meta.modification_date}')
|
||||
if verbose:
|
||||
print(verbose)
|
||||
print(' Pages:', len(reader.pages))
|
||||
# All of the following could be None!
|
||||
print(f' Author: {meta.author}')
|
||||
print(f' Creator: {meta.creator}')
|
||||
print(f'Producer: {meta.producer}')
|
||||
print(f' Subject: {meta.subject}')
|
||||
print(f' Title: {meta.title}')
|
||||
print(f' Created: {meta.creation_date}')
|
||||
print(f'Modified: {meta.modification_date}')
|
||||
|
||||
for page in reader.pages:
|
||||
page_text = page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False)
|
||||
#print(page_text)
|
||||
# page_text = page.extract_text(extraction_mode="layout", layout_mode_scale_weight=1.0)
|
||||
# print(page_text)
|
||||
# Find all tables in page_text
|
||||
calls = re.findall(r' +(OE[0-9][A-Z]{1,3}).*$', page_text)
|
||||
for call in calls:
|
||||
print(call)
|
||||
tables = re.findall(r'(?s)\b(?:\w+\s+){2,}\w+\b(?:\s*[,;]\s*\b(?:\w+\s+){2,}\w+\b)*', page_text)
|
||||
# Loop through each table and create a pandas DataFrame
|
||||
for table in tables:
|
||||
# Split the table into rows
|
||||
rows = table.strip().split('\n')
|
||||
# Split the rows into cells
|
||||
cells = [row.split('|') for row in rows]
|
||||
# Remove leading and trailing whitespace from cells
|
||||
cells = [[cell.strip() for cell in row] for row in cells]
|
||||
# Remove empty rows and columns
|
||||
cells = [[cell for cell in row if cell] for row in cells if row]
|
||||
# Create a pandas DataFrame from the cells
|
||||
df = pd.DataFrame(cells[1:], columns=cells[0])
|
||||
|
||||
# TODO: Clean and manipulate the df as needed
|
||||
|
||||
lines = page_text.strip().splitlines()
|
||||
for line in lines[3:-2]:
|
||||
line = line.strip()
|
||||
# calls = re.findall(r' +(OE[0-9][A-Z]{1,3}).*$', page_text)
|
||||
if verbose >= 2:
|
||||
print(line)
|
||||
call_data_record(line, meta.modification_date,verbose)
|
||||
|
||||
if __name__ == '__main__':
|
||||
# call_description()
|
||||
args = call_parser()
|
||||
# filename = 'Rufzeichenliste_AT_Stand_010624.pdf'
|
||||
try:
|
||||
filename = call_website(**vars(args))
|
||||
|
||||
print(f'Filename: {filename}')
|
||||
call_analyse_pdf(filename)
|
||||
call_analyse_pdf(filename,args.verbose)
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
print('Error: {}'.format(e), file=sys.stderr)
|
||||
|
Loading…
Reference in New Issue
Block a user