Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions dev/create_gcs_query_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def get_tool_urls():
file_path = shared.path_join(PATHS["data"], "legal-tool-paths.txt")
prefix = "//creativecommons.org/"
tool_urls = []
with open(file_path, "r") as file_obj:
with open(file_path, "r", encoding="utf-8") as file_obj:
for line in file_obj:
tool_urls.append(f"{prefix}{line.strip()}")
LOGGER.info("Prioritizing CC Legal Tool URLs")
Expand All @@ -127,14 +127,14 @@ def get_tool_urls():

def load_countries():
file_path = shared.path_join(PATHS["data"], "gcs_country_collection.yaml")
with open(file_path, "r") as file_obj:
with open(file_path, "r", encoding="utf-8") as file_obj:
countries = yaml.safe_load(file_obj)
return countries


def load_languages():
file_path = shared.path_join(PATHS["data"], "gcs_language_collection.yaml")
with open(file_path, "r") as file_obj:
with open(file_path, "r", encoding="utf-8") as file_obj:
languages = yaml.safe_load(file_obj)
return languages

Expand Down Expand Up @@ -209,7 +209,7 @@ def save_plan(plan):
"LANGUAGE",
"LR",
]
with open(file_path, "w") as file_obj:
with open(file_path, "w", encoding="utf-8", newline="\n") as file_obj:
writer = csv.DictWriter(
file_obj, fieldnames=fieldnames, dialect="unix"
)
Expand Down
4 changes: 2 additions & 2 deletions dev/prioritize_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def get_tool_urls():
file_path = shared.path_join(PATHS["data"], "legal-tool-paths.txt")
prefix = "//creativecommons.org/"
tool_urls = []
with open(file_path, "r") as file_obj:
with open(file_path, "r", encoding="utf-8") as file_obj:
for line in file_obj:
tool_urls.append(f"{prefix}{line.strip()}")
return tool_urls
Expand Down Expand Up @@ -112,7 +112,7 @@ def save_tools_list(tool_urls):
LOGGER.info("Saving prioritized CC Legal Tool URLs")
file_path = shared.path_join(PATHS["data"], "prioritized-tool-urls.txt")
tool_urls.append("") # ensure file has end of file newline
with open(file_path, "w") as file_obj:
with open(file_path, "w", encoding="utf-8", newline="\n") as file_obj:
file_obj.writelines("\n".join(tool_urls))


Expand Down
8 changes: 4 additions & 4 deletions scripts/1-fetch/gcs_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def get_search_service():

def initialize_data_file(file_path, header):
if not os.path.isfile(file_path):
with open(file_path, "w", newline="") as file_obj:
with open(file_path, "w", encoding="utf-8", newline="\n") as file_obj:
writer = csv.DictWriter(
file_obj, fieldnames=header, dialect="unix"
)
Expand All @@ -127,7 +127,7 @@ def get_last_completed_plan_index():
last_completed_plan_index = 0
for file_path in [FILE1_COUNT, FILE2_LANGUAGE, FILE3_COUNTRY]:
try:
with open(file_path, "r", newline="") as file_obj:
with open(file_path, "r", encoding="utf-8") as file_obj:
reader = csv.DictReader(file_obj, dialect="unix")
for row in reader:
pass # skip through to last row
Expand All @@ -147,7 +147,7 @@ def get_last_completed_plan_index():
def load_plan():
plan = []
file_path = shared.path_join(PATHS["data"], "gcs_query_plan.csv")
with open(file_path, "r", newline="") as file_obj:
with open(file_path, "r", encoding="utf-8") as file_obj:
plan = list(csv.DictReader(file_obj, dialect="unix"))
return plan

Expand Down Expand Up @@ -181,7 +181,7 @@ def append_data(args, plan_row, index, count):
"TOOL_IDENTIFIER": plan_row["TOOL_IDENTIFIER"],
"COUNT": count,
}
with open(file_path, "a", newline="") as file_obj:
with open(file_path, "a", encoding="utf-8", newline="\n") as file_obj:
writer = csv.DictWriter(
file_obj, fieldnames=fieldnames, dialect="unix"
)
Expand Down
2 changes: 1 addition & 1 deletion scripts/1-fetch/github_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def write_data(args, tool_data):
LOGGER.error("Unable to fetch all records. Aborting.")
return args

with open(FILE1_COUNT, "w", newline="") as file_obj:
with open(FILE1_COUNT, "w", encoding="utf-8", newline="\n") as file_obj:
writer = csv.DictWriter(
file_obj, fieldnames=HEADER1_COUNT, dialect="unix"
)
Expand Down
2 changes: 1 addition & 1 deletion scripts/1-fetch/wikipedia_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def write_data(args, tool_data):
LOGGER.info("Saving fetched data")
os.makedirs(PATHS["data_phase"], exist_ok=True)

with open(FILE_LANGUAGES, "w", newline="", encoding="utf-8") as file_obj:
with open(FILE_LANGUAGES, "w", encoding="utf-8", newline="\n") as file_obj:
writer = csv.DictWriter(
file_obj, fieldnames=HEADER_LANGUAGES, dialect="unix"
)
Expand Down
4 changes: 2 additions & 2 deletions scripts/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def update_readme(
entry_end_line = f"<!-- {entry_title} End -->\n"

if os.path.exists(readme_path):
with open(readme_path, "r") as f:
with open(readme_path, "r", encoding="utf-8") as f:
lines = f.readlines()
else:
lines = []
Expand Down Expand Up @@ -327,7 +327,7 @@ def update_readme(
)

# Write back to the README.md file
with open(readme_path, "w") as f:
with open(readme_path, "w", encoding="utf-8", newline="\n") as f:
f.writelines(lines)

logger.info(f"README path: {readme_path.replace(paths['repo'], '.')}")
Expand Down