Skip to content

Commit 3ddc5c9

Browse files
committed
update newly merged scripts to use shared function
1 parent e50c3ec commit 3ddc5c9

File tree

2 files changed

+10
-7
lines changed

2 files changed

+10
-7
lines changed

scripts/2-process/wikipedia_process.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,9 @@ def main():
151151
file_count = shared.path_join(
152152
PATHS["data_1-fetch"], "wikipedia_count_by_languages.csv"
153153
)
154-
count_data = pd.read_csv(file_count, usecols=["LANGUAGE_NAME_EN", "COUNT"])
154+
count_data = shared.open_data_file(
155+
LOGGER, file_count, usecols=["LANGUAGE_NAME_EN", "COUNT"]
156+
)
155157
process_language_representation(args, count_data)
156158
process_highest_language_usage(args, count_data)
157159
process_least_language_usage(args, count_data)

scripts/3-report/wikipedia_report.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import traceback
1212

1313
# Third-party
14-
import pandas as pd
1514
from pygments import highlight
1615
from pygments.formatters import TerminalFormatter
1716
from pygments.lexers import PythonTracebackLexer
@@ -87,9 +86,11 @@ def wikipedia_intro(args):
8786
)
8887
name_label = "LANGUAGE_NAME_EN"
8988
name_label_top10 = "Language"
90-
data = pd.read_csv(file_path, index_col=name_label)
89+
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
9190
total_articles = data["COUNT"].sum()
92-
top10 = pd.read_csv(file_path_top10, index_col=name_label_top10)
91+
top10 = shared.open_data_file(
92+
LOGGER, file_path_top10, index_col=name_label_top10
93+
)
9394
top10_articles = top10["Count"].sum()
9495
top10_percentage = (top10_articles / total_articles) * 100
9596
average_articles = total_articles / len(data)
@@ -131,7 +132,7 @@ def plot_language_representation(args):
131132
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
132133
name_label = "Category"
133134
data_label = "Count"
134-
data = pd.read_csv(file_path, index_col=name_label)
135+
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
135136
data.sort_values(data_label, ascending=True, inplace=True)
136137
title = "Language Representation"
137138
plt = plot.combined_plot(
@@ -176,7 +177,7 @@ def plot_highest_language_usage(args):
176177
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
177178
name_label = "Language"
178179
data_label = "Count"
179-
data = pd.read_csv(file_path, index_col=name_label)
180+
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
180181
data.sort_values(data_label, ascending=True, inplace=True)
181182
title = "Most represented languages"
182183
plt = plot.combined_plot(
@@ -219,7 +220,7 @@ def plot_least_language_usage(args):
219220
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
220221
name_label = "Language"
221222
data_label = "Count"
222-
data = pd.read_csv(file_path, index_col=name_label)
223+
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
223224
data.sort_values(data_label, ascending=True, inplace=True)
224225
title = "Least represented languages"
225226
plt = plot.combined_plot(

0 commit comments

Comments
 (0)