@@ -291,7 +291,8 @@ def generate_polars_dataframe(
291291 )
292292
293293 tweet_df1 = tweet_df .with_columns (
294- pl .col ("location" ).apply (lambda x : normalise_location (x ), skip_nulls = True )
294+ pl .col ("location" ).apply (
295+ lambda x : normalise_location (x ), skip_nulls = True )
295296 )
296297 tweet_df1 = tweet_df1 .join (sal_df , on = "location" , how = "left" )
297298
@@ -362,7 +363,7 @@ def count_number_of_tweets_by_author(tdf: pl.DataFrame) -> pl.DataFrame:
362363 tdf .select ("author_id" , "tweet_id" )
363364 .groupby ("author_id" )
364365 .agg (pl .count ("tweet_id" ).alias ("tweet_count" ))
365- .sort ("tweet_count" , reverse = True )
366+ .sort ("tweet_count" , descending = True )
366367 )
367368
368369 return author_tweet_count
@@ -466,7 +467,8 @@ def generate_task_3_result(tdf: pl.DataFrame, save: bool, path: Path) -> pl.Data
466467 )
467468
468469 tdf1 = tdf1 .with_columns (
469- pl .col ("gcc_count" ).rank (method = "ordinal" , descending = True ).alias ("rank" )
470+ pl .col ("gcc_count" ).rank (method = "ordinal" ,
471+ descending = True ).alias ("rank" )
470472 )
471473 tdf1 = tdf1 .filter (pl .col ("rank" ) < 11 )
472474
@@ -498,10 +500,12 @@ def generate_task_3_result(tdf: pl.DataFrame, save: bool, path: Path) -> pl.Data
498500 ).alias ("gtc" )
499501 ]
500502 ).select ("rank" , "author_id" , "gtc" )
501- tdf4 .columns = ["Rank" , "Author Id" , "Number of Unique City Locations and #Tweets" ]
503+ tdf4 .columns = ["Rank" , "Author Id" ,
504+ "Number of Unique City Locations and #Tweets" ]
502505
503506 if save :
504- tdf4 .sort ("Rank" , descending = False ).write_csv (path / "data/result/task3.csv" )
507+ tdf4 .sort ("Rank" , descending = False ).write_csv (
508+ path / "data/result/task3.csv" )
505509 return
506510 return tdf4
507511
@@ -537,7 +541,8 @@ def concate_count_dict_with_rank_df(count_dict: dict) -> pl.DataFrame:
537541 """
538542 strings = []
539543 for key in count_dict .keys ():
540- strings .append (", " .join ([f"#{ v } { k [1 :]} " for k , v in count_dict [key ].items ()]))
544+ strings .append (
545+ ", " .join ([f"#{ v } { k [1 :]} " for k , v in count_dict [key ].items ()]))
541546
542547 return pl .DataFrame ({"author_id" : count_dict .keys (), "nugt" : strings })
543548
0 commit comments