diff --git a/scripts/add_new_contribution_to_yaml.py b/scripts/add_new_contribution_to_yaml.py index 3ed4832..18812a7 100644 --- a/scripts/add_new_contribution_to_yaml.py +++ b/scripts/add_new_contribution_to_yaml.py @@ -1,7 +1,7 @@ """ given properties, add a new contribution to the contributions.yaml database file. """ -from datetime import datetime +from datetime import datetime, UTC import json import pathlib from sys import argv @@ -39,7 +39,7 @@ # append new contribution with next index # add status, at top - datetime_today = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S%z') + datetime_today = datetime.now(UTC).strftime('%Y-%m-%dT%H:%M:%S%z') contribution = { 'id': max_index + 1, 'status': 'VALID', diff --git a/scripts/fetch_updates.py b/scripts/fetch_updates.py index 709448e..971c346 100644 --- a/scripts/fetch_updates.py +++ b/scripts/fetch_updates.py @@ -2,15 +2,16 @@ Reads in the contributions.yaml file, and updates the entries by hitting the 'source' url. """ import argparse -from datetime import datetime +from datetime import datetime, UTC import pathlib from ruamel.yaml import YAML +from multiprocessing import Pool from parse_and_validate_properties_txt import read_properties_txt, parse_text, validate_existing def update_contribution(contribution, props): - datetime_today = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S%z') + datetime_today = datetime.now(UTC).strftime('%Y-%m-%dT%H:%M:%S%z') contribution['lastUpdated'] = datetime_today if 'previousVersions' not in contribution: contribution['previousVersions'] = [] @@ -29,6 +30,7 @@ def update_contribution(contribution, props): if 'download' not in contribution: contribution['download'] = contribution['source'][:contribution['source'].rfind('.')] + '.zip' + def log_broken(contribution, msg): if contribution['status'] == 'VALID': @@ -37,8 +39,10 @@ def log_broken(contribution, msg): contribution['log'] = [] contribution['log'].append(msg) -def process_contribution(contribution): - date_today = datetime.utcnow().strftime('%Y-%m-%d') +def process_contribution(item): + index, contribution = item + + date_today = datetime.now(UTC).strftime('%Y-%m-%d') this_version = '0' if contribution['status'] != 'DEPRECATED': @@ -51,16 +55,16 @@ def process_contribution(contribution): properties_raw = read_properties_txt(contribution['source']) except FileNotFoundError as e: log_broken(contribution, f'file not found, {e}, {date_today}') - return + return index, contribution except Exception: log_broken(contribution, f'url timeout, {date_today}') - return + return index, contribution try: props = validate_existing(parse_text(properties_raw)) except Exception: log_broken(contribution, f'invalid file, {date_today}') - return + return index, contribution # some library files have field lastUpdated. This also exists in the database, but is defined # by our scripts, so remove this field. @@ -71,6 +75,7 @@ def process_contribution(contribution): if props['version'] != this_version: # update from online update_contribution(contribution, props) + return index, contribution if __name__ == "__main__": @@ -92,14 +97,22 @@ def process_contribution(contribution): contributions_list = data['contributions'] if index == 'all': - # update all contributions - for contribution in contributions_list: - process_contribution(contribution) + total = len(contributions_list) + completed = 0 + print(f"Starting processing of {total} contributions...") + + with Pool(processes=256) as pool: + for index, contribution in pool.imap_unordered(process_contribution, enumerate(contributions_list)): + contributions_list[index] = contribution + completed += 1 + print(f"Progress: {completed}/{total} ({(completed/total*100):.1f}%)") + + print("All processing complete") else: # update only contribution with id==index contribution = next((x for x in contributions_list if x['id'] == int(index)), None) print(contribution) - process_contribution(contribution) + process_contribution((index, contribution)) print(contribution) # write all contributions to database file diff --git a/scripts/parse_and_validate_properties_txt.py b/scripts/parse_and_validate_properties_txt.py index d4eed27..d439bb3 100644 --- a/scripts/parse_and_validate_properties_txt.py +++ b/scripts/parse_and_validate_properties_txt.py @@ -64,7 +64,7 @@ def read_properties_txt(properties_url): 'User-Agent': 'Mozilla/5.0', 'Accept': 'text/html', } - r = requests.get(properties_url, headers=headers) + r = requests.get(properties_url, headers=headers, timeout=30) if r.status_code != 200: raise FileNotFoundError(f"status code {r.status_code} returned for url {r.url}")