|
11 | 11 | import traceback |
12 | 12 |
|
13 | 13 | # Third-party |
14 | | -import pandas as pd |
15 | 14 | from pygments import highlight |
16 | 15 | from pygments.formatters import TerminalFormatter |
17 | 16 | from pygments.lexers import PythonTracebackLexer |
@@ -87,9 +86,11 @@ def wikipedia_intro(args): |
87 | 86 | ) |
88 | 87 | name_label = "LANGUAGE_NAME_EN" |
89 | 88 | name_label_top10 = "Language" |
90 | | - data = pd.read_csv(file_path, index_col=name_label) |
| 89 | + data = shared.open_data_file(LOGGER, file_path, index_col=name_label) |
91 | 90 | total_articles = data["COUNT"].sum() |
92 | | - top10 = pd.read_csv(file_path_top10, index_col=name_label_top10) |
| 91 | + top10 = shared.open_data_file( |
| 92 | + LOGGER, file_path_top10, index_col=name_label_top10 |
| 93 | + ) |
93 | 94 | top10_articles = top10["Count"].sum() |
94 | 95 | top10_percentage = (top10_articles / total_articles) * 100 |
95 | 96 | average_articles = total_articles / len(data) |
@@ -131,7 +132,7 @@ def plot_language_representation(args): |
131 | 132 | LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}") |
132 | 133 | name_label = "Category" |
133 | 134 | data_label = "Count" |
134 | | - data = pd.read_csv(file_path, index_col=name_label) |
| 135 | + data = shared.open_data_file(LOGGER, file_path, index_col=name_label) |
135 | 136 | data.sort_values(data_label, ascending=True, inplace=True) |
136 | 137 | title = "Language Representation" |
137 | 138 | plt = plot.combined_plot( |
@@ -176,7 +177,7 @@ def plot_highest_language_usage(args): |
176 | 177 | LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}") |
177 | 178 | name_label = "Language" |
178 | 179 | data_label = "Count" |
179 | | - data = pd.read_csv(file_path, index_col=name_label) |
| 180 | + data = shared.open_data_file(LOGGER, file_path, index_col=name_label) |
180 | 181 | data.sort_values(data_label, ascending=True, inplace=True) |
181 | 182 | title = "Most represented languages" |
182 | 183 | plt = plot.combined_plot( |
@@ -219,7 +220,7 @@ def plot_least_language_usage(args): |
219 | 220 | LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}") |
220 | 221 | name_label = "Language" |
221 | 222 | data_label = "Count" |
222 | | - data = pd.read_csv(file_path, index_col=name_label) |
| 223 | + data = shared.open_data_file(LOGGER, file_path, index_col=name_label) |
223 | 224 | data.sort_values(data_label, ascending=True, inplace=True) |
224 | 225 | title = "Least represented languages" |
225 | 226 | plt = plot.combined_plot( |
|
0 commit comments