@@ -260,11 +260,11 @@ def process_totals_by_restrictions(args, count_data):
260260 data_to_csv (args , data , file_path )
261261
262262
263- def process_totals_by_langauage (args , data ):
263+ def process_totals_by_language (args , data ):
264264 """
265265 Processing language data: totals by language
266266 """
267- LOGGER .info (process_totals_by_langauage .__doc__ .strip ())
267+ LOGGER .info (process_totals_by_language .__doc__ .strip ())
268268 data = data .groupby (["LANGUAGE" ], as_index = False )["COUNT" ].sum ()
269269 data = data .sort_values ("COUNT" , ascending = False )
270270 data .reset_index (drop = True , inplace = True )
@@ -276,7 +276,7 @@ def process_totals_by_langauage(args, data):
276276 inplace = True ,
277277 )
278278 file_path = shared .path_join (
279- PATHS ["data_phase" ], "gcs_totals_by_langauage .csv"
279+ PATHS ["data_phase" ], "gcs_totals_by_language .csv"
280280 )
281281 data_to_csv (args , data , file_path )
282282
@@ -302,28 +302,6 @@ def process_totals_by_country(args, data):
302302 data_to_csv (args , data , file_path )
303303
304304
305- # Data is already limited to licenses 4.0, CC0, and PDM
306- #
307- # def process_license_40_totals_by_langauage(args, data):
308- # LOGGER.info("Processing language data: top 25 languages")
309- # data = data[data["TOOL_IDENTIFIER"].str.contains("CC BY")]
310- # data = data[data["TOOL_IDENTIFIER"].str.contains("4.0")]
311- # data = data.groupby(["LANGUAGE"], as_index=False)['COUNT'].sum()
312- # data = data.sort_values("COUNT", ascending=False)
313- # data.reset_index(drop=True, inplace=True)
314- # data.rename(
315- # columns={
316- # "LANGUAGE": "Language",
317- # "COUNT": "Count",
318- # },
319- # inplace=True,
320- # )
321- # file_path = shared.path_join(
322- # PATHS["data_phase"], "gcs_license_40_totals_by_langauage.csv"
323- # )
324- # data_to_csv(args, data, file_path)
325-
326-
327305# def load_quarter_data(quarter):
328306# """
329307# Load data for a specific quarter.
@@ -348,7 +326,7 @@ def process_totals_by_country(args, data):
348326
349327# # Process the data to compare by country
350328# compare_by_country(current_data, previous_data,
351- # current_quarter, previous_quarter)
329+ # current_quarter, previous_quarter)
352330
353331# # Process the data to compare by license
354332# compare_by_license(current_data, previous_data,
@@ -360,7 +338,7 @@ def process_totals_by_country(args, data):
360338
361339
362340# def compare_by_country(current_data, previous_data,
363- # current_quarter, previous_quarter):
341+ # current_quarter, previous_quarter):
364342# """
365343# Compare the number of webpages licensed by country between two quarters.
366344# """
@@ -460,8 +438,7 @@ def main():
460438 language_data = pd .read_csv (
461439 FILE2_LANGUAGE , usecols = ["TOOL_IDENTIFIER" , "LANGUAGE" , "COUNT" ]
462440 )
463- process_totals_by_langauage (args , language_data )
464- # process_license_40_totals_by_langauage(args, language_data)
441+ process_totals_by_language (args , language_data )
465442
466443 # Country data
467444 country_data = pd .read_csv (
0 commit comments