-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathbetter_train_data.py
More file actions
40 lines (29 loc) · 1.23 KB
/
better_train_data.py
File metadata and controls
40 lines (29 loc) · 1.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# this is a script that takes in two training sets, one for positive and one for negative data
# It converts the two in a single csv file with 2 columns, one with 0s and 1s and another one with
# the actual post
import json
import csv
# retuns a list of dictionaries each of which is a tweet given a filename
def get_data_dict(filename):
with open(filename, 'r') as infile:
tweets = []
for line in infile:
json_dict = json.loads(line)
tweets.append(json_dict)
return tweets
positive_tweets = get_data_dict('Training_Data/training_negative_tweets.json')
negative_tweets = get_data_dict('Training_Data/training_positive_tweets.json')
positive_text = []
for tweet in positive_tweets:
positive_text.append(tweet['text'])
negative_text = []
for tweet in negative_tweets:
negative_text.append(tweet['text'])
binary_positive = [1 for i in range(len(positive_text))]
binary_negative = [0 for i in range(len(negative_text))]
final_lst = list(zip(binary_positive, positive_text))
final_lst.extend(zip(binary_negative, negative_text))
with open('Training_Data/training_set_better.csv', 'w') as output:
csv_writer = csv.writer(output)
for row in final_lst:
csv_writer.writerow(row)