Skip to content

Commit ddb3958

Browse files
authored
Merge pull request #218 from creativecommons/encoding-utf8-newline-unix
ensure encoding (utf-8) and newline (unix) are consistent across platforms
2 parents 6d68820 + 24d412f commit ddb3958

File tree

6 files changed

+14
-14
lines changed

6 files changed

+14
-14
lines changed

dev/create_gcs_query_plan.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def get_tool_urls():
117117
file_path = shared.path_join(PATHS["data"], "legal-tool-paths.txt")
118118
prefix = "//creativecommons.org/"
119119
tool_urls = []
120-
with open(file_path, "r") as file_obj:
120+
with open(file_path, "r", encoding="utf-8") as file_obj:
121121
for line in file_obj:
122122
tool_urls.append(f"{prefix}{line.strip()}")
123123
LOGGER.info("Prioritizing CC Legal Tool URLs")
@@ -127,14 +127,14 @@ def get_tool_urls():
127127

128128
def load_countries():
129129
file_path = shared.path_join(PATHS["data"], "gcs_country_collection.yaml")
130-
with open(file_path, "r") as file_obj:
130+
with open(file_path, "r", encoding="utf-8") as file_obj:
131131
countries = yaml.safe_load(file_obj)
132132
return countries
133133

134134

135135
def load_languages():
136136
file_path = shared.path_join(PATHS["data"], "gcs_language_collection.yaml")
137-
with open(file_path, "r") as file_obj:
137+
with open(file_path, "r", encoding="utf-8") as file_obj:
138138
languages = yaml.safe_load(file_obj)
139139
return languages
140140

@@ -209,7 +209,7 @@ def save_plan(plan):
209209
"LANGUAGE",
210210
"LR",
211211
]
212-
with open(file_path, "w") as file_obj:
212+
with open(file_path, "w", encoding="utf-8", newline="\n") as file_obj:
213213
writer = csv.DictWriter(
214214
file_obj, fieldnames=fieldnames, dialect="unix"
215215
)

dev/prioritize_tools.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def get_tool_urls():
4242
file_path = shared.path_join(PATHS["data"], "legal-tool-paths.txt")
4343
prefix = "//creativecommons.org/"
4444
tool_urls = []
45-
with open(file_path, "r") as file_obj:
45+
with open(file_path, "r", encoding="utf-8") as file_obj:
4646
for line in file_obj:
4747
tool_urls.append(f"{prefix}{line.strip()}")
4848
return tool_urls
@@ -112,7 +112,7 @@ def save_tools_list(tool_urls):
112112
LOGGER.info("Saving prioritized CC Legal Tool URLs")
113113
file_path = shared.path_join(PATHS["data"], "prioritized-tool-urls.txt")
114114
tool_urls.append("") # ensure file has end of file newline
115-
with open(file_path, "w") as file_obj:
115+
with open(file_path, "w", encoding="utf-8", newline="\n") as file_obj:
116116
file_obj.writelines("\n".join(tool_urls))
117117

118118

scripts/1-fetch/gcs_fetch.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def get_search_service():
104104

105105
def initialize_data_file(file_path, header):
106106
if not os.path.isfile(file_path):
107-
with open(file_path, "w", newline="") as file_obj:
107+
with open(file_path, "w", encoding="utf-8", newline="\n") as file_obj:
108108
writer = csv.DictWriter(
109109
file_obj, fieldnames=header, dialect="unix"
110110
)
@@ -127,7 +127,7 @@ def get_last_completed_plan_index():
127127
last_completed_plan_index = 0
128128
for file_path in [FILE1_COUNT, FILE2_LANGUAGE, FILE3_COUNTRY]:
129129
try:
130-
with open(file_path, "r", newline="") as file_obj:
130+
with open(file_path, "r", encoding="utf-8") as file_obj:
131131
reader = csv.DictReader(file_obj, dialect="unix")
132132
for row in reader:
133133
pass # skip through to last row
@@ -147,7 +147,7 @@ def get_last_completed_plan_index():
147147
def load_plan():
148148
plan = []
149149
file_path = shared.path_join(PATHS["data"], "gcs_query_plan.csv")
150-
with open(file_path, "r", newline="") as file_obj:
150+
with open(file_path, "r", encoding="utf-8") as file_obj:
151151
plan = list(csv.DictReader(file_obj, dialect="unix"))
152152
return plan
153153

@@ -181,7 +181,7 @@ def append_data(args, plan_row, index, count):
181181
"TOOL_IDENTIFIER": plan_row["TOOL_IDENTIFIER"],
182182
"COUNT": count,
183183
}
184-
with open(file_path, "a", newline="") as file_obj:
184+
with open(file_path, "a", encoding="utf-8", newline="\n") as file_obj:
185185
writer = csv.DictWriter(
186186
file_obj, fieldnames=fieldnames, dialect="unix"
187187
)

scripts/1-fetch/github_fetch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def write_data(args, tool_data):
110110
LOGGER.error("Unable to fetch all records. Aborting.")
111111
return args
112112

113-
with open(FILE1_COUNT, "w", newline="") as file_obj:
113+
with open(FILE1_COUNT, "w", encoding="utf-8", newline="\n") as file_obj:
114114
writer = csv.DictWriter(
115115
file_obj, fieldnames=HEADER1_COUNT, dialect="unix"
116116
)

scripts/1-fetch/wikipedia_fetch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def write_data(args, tool_data):
8383
LOGGER.info("Saving fetched data")
8484
os.makedirs(PATHS["data_phase"], exist_ok=True)
8585

86-
with open(FILE_LANGUAGES, "w", newline="", encoding="utf-8") as file_obj:
86+
with open(FILE_LANGUAGES, "w", encoding="utf-8", newline="\n") as file_obj:
8787
writer = csv.DictWriter(
8888
file_obj, fieldnames=HEADER_LANGUAGES, dialect="unix"
8989
)

scripts/shared.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ def update_readme(
239239
entry_end_line = f"<!-- {entry_title} End -->\n"
240240

241241
if os.path.exists(readme_path):
242-
with open(readme_path, "r") as f:
242+
with open(readme_path, "r", encoding="utf-8") as f:
243243
lines = f.readlines()
244244
else:
245245
lines = []
@@ -327,7 +327,7 @@ def update_readme(
327327
)
328328

329329
# Write back to the README.md file
330-
with open(readme_path, "w") as f:
330+
with open(readme_path, "w", encoding="utf-8", newline="\n") as f:
331331
f.writelines(lines)
332332

333333
logger.info(f"README path: {readme_path.replace(paths['repo'], '.')}")

0 commit comments

Comments
 (0)