Skip to content

Commit f55c3c3

Browse files
committed
Prioritize root accounts and accounts with more tweets
1 parent d0c1bfa commit f55c3c3

File tree

1 file changed

+35
-16
lines changed

1 file changed

+35
-16
lines changed

use_botometer.py

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import os
33
import json
44
import argparse
5+
from collections import Counter
56

67
from dotenv import load_dotenv
78

@@ -39,42 +40,60 @@
3940
parser.add_argument('--dry', dest='dry_run', action='store_true',
4041
help='make a dry run (without using the API)')
4142

42-
parser.add_argument('--max-calls', dest='max_calls', type=int, default=2000,
43+
parser.add_argument('--max', dest='max_calls', type=int, default=2000,
4344
help=("maximum number of Botometer API calls "
44-
"(default: 2000, which is Botometer API's free daily limit)"))
45+
"(default: 2000, which is Botometer API's free daily limit); "
46+
"priority will be given to the original tweet's author, "
47+
"and then the most frequently appeared accounts"))
4548

4649
args = parser.parse_args()
4750

48-
accounts = set()
51+
all_accounts = Counter()
4952
for file in args.conversation_files:
5053
with open(file, 'r') as f:
5154
data = json.load(f)
5255
tweets = data["data"]
53-
accounts |= set(tweet["author_id"] for tweet in tweets) # Twitter API v2
56+
tweets.sort(key=lambda t: t['id'])
57+
orig_author = tweets[0]["author_id"]
58+
all_accounts[orig_author] = float('inf')
59+
all_accounts |= Counter(tweet["author_id"] for tweet in tweets) # Twitter API v2
5460

5561
output_file = "outputs/botometer_cache.json"
5662

5763
with open(output_file, 'r') as f:
58-
all_output = json.load(f)
59-
prev_accounts = set(all_output)
64+
checked_accounts = json.load(f)
6065

61-
new_accounts = accounts - prev_accounts
62-
if len(new_accounts) > args.max_calls:
63-
new_accounts = set(list(new_accounts)[:args.max_calls])
66+
for acc in checked_accounts:
67+
if acc in all_accounts:
68+
all_accounts[acc] = 0
69+
70+
unchecked_accounts = all_accounts.most_common(args.max_calls)
71+
new_accounts = list(k for k, v in unchecked_accounts if v > 0)
72+
#if len(new_accounts) > args.max_calls:
73+
# new_accounts = set(list(new_accounts)[:args.max_calls])
6474

6575
if args.dry_run:
66-
print(new_accounts)
76+
print(all_accounts.most_common(args.max_calls))
6777
print(len(new_accounts))
6878
else:
69-
new_output = {screen_name: result for screen_name, result in bom.check_accounts_in(new_accounts)}
70-
[print(f"{k}: {v}") for k, v in new_output.items()]
71-
print(f"Num. of new accounts added: {len(new_accounts)}")
72-
all_output.update(new_output)
73-
serialized_json = json.dumps(all_output,
79+
count = 0
80+
try:
81+
for screen_name, result in bom.check_accounts_in(new_accounts):
82+
checked_accounts[screen_name] = result
83+
count += 1
84+
print(f"Checked {count} accounts\n")
85+
print(f"{screen_name}: {result}\n")
86+
except KeyboardInterrupt: # gracefully exit if user presses Ctrl-C
87+
print()
88+
89+
#[print(f"{k}: {v}\n") for k, v in new_output.items()]
90+
print(f"Num. of new accounts added: {count}")
91+
92+
serialized_json = json.dumps(checked_accounts,
7493
indent=4,
7594
sort_keys=True)
7695

77-
print(f"Total num. of accounts checked: {len(all_output)}")
96+
print(f"Total num. of accounts checked: {len(checked_accounts)}")
7897

7998
with open(output_file, 'w') as f:
8099
f.writelines(serialized_json)

0 commit comments

Comments
 (0)