22import os
33import json
44import argparse
5+ from collections import Counter
56
67from dotenv import load_dotenv
78
3940parser .add_argument ('--dry' , dest = 'dry_run' , action = 'store_true' ,
4041 help = 'make a dry run (without using the API)' )
4142
42- parser .add_argument ('--max-calls ' , dest = 'max_calls' , type = int , default = 2000 ,
43+ parser .add_argument ('--max' , dest = 'max_calls' , type = int , default = 2000 ,
4344 help = ("maximum number of Botometer API calls "
44- "(default: 2000, which is Botometer API's free daily limit)" ))
45+ "(default: 2000, which is Botometer API's free daily limit); "
46+ "priority will be given to the original tweet's author, "
47+ "and then the most frequently appeared accounts" ))
4548
4649args = parser .parse_args ()
4750
48- accounts = set ()
51+ all_accounts = Counter ()
4952for file in args .conversation_files :
5053 with open (file , 'r' ) as f :
5154 data = json .load (f )
5255 tweets = data ["data" ]
53- accounts |= set (tweet ["author_id" ] for tweet in tweets ) # Twitter API v2
56+ tweets .sort (key = lambda t : t ['id' ])
57+ orig_author = tweets [0 ]["author_id" ]
58+ all_accounts [orig_author ] = float ('inf' )
59+ all_accounts |= Counter (tweet ["author_id" ] for tweet in tweets ) # Twitter API v2
5460
5561output_file = "outputs/botometer_cache.json"
5662
5763with open (output_file , 'r' ) as f :
58- all_output = json .load (f )
59- prev_accounts = set (all_output )
64+ checked_accounts = json .load (f )
6065
61- new_accounts = accounts - prev_accounts
62- if len (new_accounts ) > args .max_calls :
63- new_accounts = set (list (new_accounts )[:args .max_calls ])
66+ for acc in checked_accounts :
67+ if acc in all_accounts :
68+ all_accounts [acc ] = 0
69+
70+ unchecked_accounts = all_accounts .most_common (args .max_calls )
71+ new_accounts = list (k for k , v in unchecked_accounts if v > 0 )
72+ #if len(new_accounts) > args.max_calls:
73+ # new_accounts = set(list(new_accounts)[:args.max_calls])
6474
6575if args .dry_run :
66- print (new_accounts )
76+ print (all_accounts . most_common ( args . max_calls ) )
6777 print (len (new_accounts ))
6878else :
69- new_output = {screen_name : result for screen_name , result in bom .check_accounts_in (new_accounts )}
70- [print (f"{ k } : { v } " ) for k , v in new_output .items ()]
71- print (f"Num. of new accounts added: { len (new_accounts )} " )
72- all_output .update (new_output )
73- serialized_json = json .dumps (all_output ,
79+ count = 0
80+ try :
81+ for screen_name , result in bom .check_accounts_in (new_accounts ):
82+ checked_accounts [screen_name ] = result
83+ count += 1
84+ print (f"Checked { count } accounts\n " )
85+ print (f"{ screen_name } : { result } \n " )
86+ except KeyboardInterrupt : # gracefully exit if user presses Ctrl-C
87+ print ()
88+
89+ #[print(f"{k}: {v}\n") for k, v in new_output.items()]
90+ print (f"Num. of new accounts added: { count } " )
91+
92+ serialized_json = json .dumps (checked_accounts ,
7493 indent = 4 ,
7594 sort_keys = True )
7695
77- print (f"Total num. of accounts checked: { len (all_output )} " )
96+ print (f"Total num. of accounts checked: { len (checked_accounts )} " )
7897
7998 with open (output_file , 'w' ) as f :
8099 f .writelines (serialized_json )
0 commit comments