-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_extraction.py
More file actions
50 lines (43 loc) · 996 Bytes
/
data_extraction.py
File metadata and controls
50 lines (43 loc) · 996 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
'''
data extraction
'''
import re
f = open('en-development', 'r')
y=[]
XX=[]
YY=[]
for i in range(500):
x=f.readline()
x=list(x.split("\t"))
z=x[1]
y=x[2][0:-1]
XX.append(z)
YY.append(y)
f=open('C:\\Users\\talk2\Desktop\Industrial Training ML\dev\classifier\data_1','w')
f.write("")
f.close()
f=open('C:\\Users\\talk2\Desktop\Industrial Training ML\dev\classifier\data_1','a')
for i in XX:
if(i.find('"')==0):
i=i[1:-1]
f.write(i)
else:
f.write(i)
f.write("\n")
f.close()
f=open('C:\\Users\\talk2\Desktop\Industrial Training ML\dev\classifier\class_1','w')
f.write("")
f.close()
f=open('C:\\Users\\talk2\Desktop\Industrial Training ML\dev\classifier\class_1','a')
for i in YY:
if(i.find(",")<>-1):
i=re.split(',' and '"',i)
i=i[1].split(",")
f.write(i[0])
f.write(" ")
f.write(i[1])
else:
f.write(i)
f.write("\n")
f.close()
print "Data pre-processed successfully"