-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathstep-01-process_tweets.py
More file actions
46 lines (36 loc) · 1.46 KB
/
step-01-process_tweets.py
File metadata and controls
46 lines (36 loc) · 1.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
"""
This script processes the tweets dataset and saves the output to a new CSV file.
https://www.kaggle.com/datasets/crowdflower/twitter-airline-sentiment
"""
import csv
import json
def process_file(file_path):
output_data = []
sentiment_mapping = {"positive": 1.0, "neutral": 0.0, "negative": -1.0}
with open(file_path, "r", encoding="utf-8") as file:
reader = csv.DictReader(file)
for row in reader:
sentence = row["text"]
sentiment = row["airline_sentiment"]
confidence = float(row["airline_sentiment_confidence"])
numeric_sentiment = sentiment_mapping[sentiment]
reasoning = (
f"The sentiment is {sentiment} based on the content of the tweet."
)
output_item = {
"reasoning": reasoning,
"sentiment": round(numeric_sentiment, 2),
"confidence": round(confidence, 2),
}
output_data.append((sentence, json.dumps(output_item)))
return output_data
# Set the file path for the new dataset
file_path = "data/inputs/airline_tweaks/Tweets.csv"
# Process the new dataset file
output_data = process_file(file_path)
# Save the output data to a new CSV file
output_file = "data/outputs/Processed_Tweets_output.csv"
with open(output_file, "w", newline="", encoding="utf-8") as file:
writer = csv.writer(file)
writer.writerow(["Sentence", "JSON"])
writer.writerows(output_data)