Skip to content

Commit 5d6b739

Browse files
committed
run corrections in affiliation influence both before and after timed affiliation matching
1 parent 5cb7c8a commit 5d6b739

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

bigbang/analysis/affiliation.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,25 @@ def affiliated_influence(arx, affiliations, corrections = {}, top_n=50):
1212
## based on email domain
1313
augment(arx)
1414

15+
# first pass at corrections: normalize the names of senders
16+
adata = arx.data.copy()
17+
adata["sender_cat"] = adata["sender_cat"].map(lambda x: corrections.get(x, x))
18+
arx = Archive(adata)
19+
20+
if ("Mankin, Allison" == arx.data["sender_cat"]).sum() > 0:
21+
import pdb; pdb.set_trace()
22+
1523
## this further looks up the email author in the affiliations table
1624
## and modifies the sender_cat column
1725
arx.data["sender_cat"] = arx.data.apply(
1826
lambda mrow: lookup_affiliation(mrow["sender_cat"], mrow["Date"], affiliations),
1927
axis=1,
2028
)
2129

30+
# second pass at corrections: normalize the names of companies.
2231
adata = arx.data.copy()
23-
2432
adata["sender_cat"] = adata["sender_cat"].map(lambda x: corrections.get(x, x))
33+
arx = Archive(adata)
2534

2635
top_ddd = aggregate_activity(Archive(adata), top_n)
2736

0 commit comments

Comments
 (0)