-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcleanEmpPositions.py
More file actions
44 lines (32 loc) · 1.24 KB
/
cleanEmpPositions.py
File metadata and controls
44 lines (32 loc) · 1.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
__author__ = 'Akhil'
import pandas as pd
names=["RawEmail","RawEmpName","Position","Role"]
f = open("enronEmployeePositionsDownload.txt")
fw = open("temp.csv","w")
n = 100
fw.writelines(",".join(names)+"\n")
for line in f:
l = line.replace("\t",",")
for i in range(n):
l = l.replace(" "*(n+1-i),",")
fw.writelines(l)
f.close()
fw.close()
data = pd.read_csv("temp.csv",header=0)
data["Email"] = data.RawEmpName.apply(lambda x: x.lower().replace(" ",".")+"@enron.com")
data = data.drop_duplicates(subset="Email")
data.to_csv("empPos.csv",index=False)
# combine all data
emailData = pd.read_csv("enronDataCleaned.csv")
print emailData.shape
# make one for the sender and one for the receiver
data_r = data[["Email","Position","Role"]].copy()
data_s = data[["Email","Position","Role"]].copy()
# rename columns and merge
data_s.rename(columns={'Email':'SenderEmail',"Position":"Position_s","Role":"Role_s"},inplace=True)
allData = pd.merge(emailData,data_s,how="left",on="SenderEmail")
# rename columns and merge
data_r.rename(columns={'Email':'ReceiverEmail',"Position":"Position_r","Role":"Role_r"},inplace=True)
allData = pd.merge(allData,data_r,how="left",on="ReceiverEmail")
print allData.shape
allData.to_csv("enronDataWithPositions.csv")