diff --git a/dev/create_gcs_query_plan.py b/dev/create_gcs_query_plan.py index f8861d0b..67c2467c 100755 --- a/dev/create_gcs_query_plan.py +++ b/dev/create_gcs_query_plan.py @@ -117,7 +117,7 @@ def get_tool_urls(): file_path = shared.path_join(PATHS["data"], "legal-tool-paths.txt") prefix = "//creativecommons.org/" tool_urls = [] - with open(file_path, "r") as file_obj: + with open(file_path, "r", encoding="utf-8") as file_obj: for line in file_obj: tool_urls.append(f"{prefix}{line.strip()}") LOGGER.info("Prioritizing CC Legal Tool URLs") @@ -127,14 +127,14 @@ def get_tool_urls(): def load_countries(): file_path = shared.path_join(PATHS["data"], "gcs_country_collection.yaml") - with open(file_path, "r") as file_obj: + with open(file_path, "r", encoding="utf-8") as file_obj: countries = yaml.safe_load(file_obj) return countries def load_languages(): file_path = shared.path_join(PATHS["data"], "gcs_language_collection.yaml") - with open(file_path, "r") as file_obj: + with open(file_path, "r", encoding="utf-8") as file_obj: languages = yaml.safe_load(file_obj) return languages @@ -209,7 +209,7 @@ def save_plan(plan): "LANGUAGE", "LR", ] - with open(file_path, "w") as file_obj: + with open(file_path, "w", encoding="utf-8", newline="\n") as file_obj: writer = csv.DictWriter( file_obj, fieldnames=fieldnames, dialect="unix" ) diff --git a/dev/prioritize_tools.py b/dev/prioritize_tools.py index 5a04e4c7..e89c2330 100755 --- a/dev/prioritize_tools.py +++ b/dev/prioritize_tools.py @@ -42,7 +42,7 @@ def get_tool_urls(): file_path = shared.path_join(PATHS["data"], "legal-tool-paths.txt") prefix = "//creativecommons.org/" tool_urls = [] - with open(file_path, "r") as file_obj: + with open(file_path, "r", encoding="utf-8") as file_obj: for line in file_obj: tool_urls.append(f"{prefix}{line.strip()}") return tool_urls @@ -112,7 +112,7 @@ def save_tools_list(tool_urls): LOGGER.info("Saving prioritized CC Legal Tool URLs") file_path = shared.path_join(PATHS["data"], "prioritized-tool-urls.txt") tool_urls.append("") # ensure file has end of file newline - with open(file_path, "w") as file_obj: + with open(file_path, "w", encoding="utf-8", newline="\n") as file_obj: file_obj.writelines("\n".join(tool_urls)) diff --git a/scripts/1-fetch/gcs_fetch.py b/scripts/1-fetch/gcs_fetch.py index 9f2b781e..67bc0164 100755 --- a/scripts/1-fetch/gcs_fetch.py +++ b/scripts/1-fetch/gcs_fetch.py @@ -104,7 +104,7 @@ def get_search_service(): def initialize_data_file(file_path, header): if not os.path.isfile(file_path): - with open(file_path, "w", newline="") as file_obj: + with open(file_path, "w", encoding="utf-8", newline="\n") as file_obj: writer = csv.DictWriter( file_obj, fieldnames=header, dialect="unix" ) @@ -127,7 +127,7 @@ def get_last_completed_plan_index(): last_completed_plan_index = 0 for file_path in [FILE1_COUNT, FILE2_LANGUAGE, FILE3_COUNTRY]: try: - with open(file_path, "r", newline="") as file_obj: + with open(file_path, "r", encoding="utf-8") as file_obj: reader = csv.DictReader(file_obj, dialect="unix") for row in reader: pass # skip through to last row @@ -147,7 +147,7 @@ def get_last_completed_plan_index(): def load_plan(): plan = [] file_path = shared.path_join(PATHS["data"], "gcs_query_plan.csv") - with open(file_path, "r", newline="") as file_obj: + with open(file_path, "r", encoding="utf-8") as file_obj: plan = list(csv.DictReader(file_obj, dialect="unix")) return plan @@ -181,7 +181,7 @@ def append_data(args, plan_row, index, count): "TOOL_IDENTIFIER": plan_row["TOOL_IDENTIFIER"], "COUNT": count, } - with open(file_path, "a", newline="") as file_obj: + with open(file_path, "a", encoding="utf-8", newline="\n") as file_obj: writer = csv.DictWriter( file_obj, fieldnames=fieldnames, dialect="unix" ) diff --git a/scripts/1-fetch/github_fetch.py b/scripts/1-fetch/github_fetch.py index 0d68f715..50692377 100755 --- a/scripts/1-fetch/github_fetch.py +++ b/scripts/1-fetch/github_fetch.py @@ -110,7 +110,7 @@ def write_data(args, tool_data): LOGGER.error("Unable to fetch all records. Aborting.") return args - with open(FILE1_COUNT, "w", newline="") as file_obj: + with open(FILE1_COUNT, "w", encoding="utf-8", newline="\n") as file_obj: writer = csv.DictWriter( file_obj, fieldnames=HEADER1_COUNT, dialect="unix" ) diff --git a/scripts/1-fetch/wikipedia_fetch.py b/scripts/1-fetch/wikipedia_fetch.py index dc4fca3f..da43907e 100755 --- a/scripts/1-fetch/wikipedia_fetch.py +++ b/scripts/1-fetch/wikipedia_fetch.py @@ -83,7 +83,7 @@ def write_data(args, tool_data): LOGGER.info("Saving fetched data") os.makedirs(PATHS["data_phase"], exist_ok=True) - with open(FILE_LANGUAGES, "w", newline="", encoding="utf-8") as file_obj: + with open(FILE_LANGUAGES, "w", encoding="utf-8", newline="\n") as file_obj: writer = csv.DictWriter( file_obj, fieldnames=HEADER_LANGUAGES, dialect="unix" ) diff --git a/scripts/shared.py b/scripts/shared.py index 26f4a1ba..0b0d0810 100644 --- a/scripts/shared.py +++ b/scripts/shared.py @@ -239,7 +239,7 @@ def update_readme( entry_end_line = f"\n" if os.path.exists(readme_path): - with open(readme_path, "r") as f: + with open(readme_path, "r", encoding="utf-8") as f: lines = f.readlines() else: lines = [] @@ -327,7 +327,7 @@ def update_readme( ) # Write back to the README.md file - with open(readme_path, "w") as f: + with open(readme_path, "w", encoding="utf-8", newline="\n") as f: f.writelines(lines) logger.info(f"README path: {readme_path.replace(paths['repo'], '.')}")