-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathgetData_EN.py
More file actions
36 lines (26 loc) · 803 Bytes
/
getData_EN.py
File metadata and controls
36 lines (26 loc) · 803 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
##################################################################
# This code file gets the raw tweets written in the English text #
##################################################################
import json
f = open('englishData.txt','w')
def readData(file_name):
global data
with open(file_name) as json_data:
data = json.load(json_data)
def extractTaggedText():
for tweet in data:
try:
text=""
tweet_word_list = tweet['lang_tagged_text'].split()
for index in range(len(tweet_word_list)):
actual_word = tweet_word_list[index][:-3]
text = text + actual_word
if index < len(tweet_word_list)-1:
text = text + " "
f.write(text)
f.write('\n')
except:
f.write("")
file_name = 'train_codemixed.json'
readData(file_name)
extractTaggedText()