forked from Nikolair1/XHealth
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsummarize.py
More file actions
74 lines (63 loc) · 3.22 KB
/
summarize.py
File metadata and controls
74 lines (63 loc) · 3.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from openai import OpenAI
import asyncio
import random
import json
prompt = """You are a tweet analyzer, extract all the information from these tweets that pertains to each of these categories:
*Disease Outbreaks and Public Health Emergencies* - New or breaking information about only epidemics and diseases around the world, avoid covid related updates unless they involve new variants
*Health Disparities and Equity* - Information regarding minority, marginalized, or lower-income individuals, avoid initiatives and awareness months/weeks
*Medical Research and Innovations* - advancements or studies to do with biotechnology
*Daily Health and Nutrition* - information regarding daily activities and nutrition and tips to imrpove daily living
*None of the above* - unnecessary information related to politics or awareness months or information that doesn't fall into the above categories
Find the 5 best tweets for each of the categories and then summarize them into a 150 character passage that uses correct grammar and is easy to follow as if there was no longer access to the tweets. Be as specific as possible while maintaining the paragraph format and end each summary with at most 2 relevant and positive hashtags. Use complete sentences and avoid semicolons and return them in the following format:
{"category1": "summary1", "category2": "summary2", "category3": "summary3", "category4": "summary4"}.
"""
from utility.post_tweets import tweet
file_path = "./data/april_21_tweets.txt"
async def create_summaries(date):
client = OpenAI()
text = ""
with open(file_path, "r") as file:
lines = file.readlines()
random.shuffle(lines)
text = lines[0:200]
arr = []
tooLong = True
while tooLong:
completion = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": "".join(text)},
],
)
dict = json.loads(completion.choices[0].message.content)
arr.append(dict["Daily Health and Nutrition"])
arr.append(dict["Disease Outbreaks and Public Health Emergencies"])
arr.append(dict["Health Disparities and Equity"])
arr.append(dict["Medical Research and Innovations"])
tooLong = False
for item in arr:
if len(item) > 275:
print("Too long! Trying again. \n")
tooLong = True
arr = []
completion2 = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": 'create a python array of 4 headers, indexed by integers, 1 for each of the items in this text, just output the array like text in the format ["header1", "header2"...]',
},
{"role": "user", "content": "".join(arr)},
],
max_tokens=4096,
)
# print(completion2.choices[0].message.content)
summary = completion2.choices[0].message.content
topics = arr
print("before JSON", summary)
summary = json.loads(summary)
tweet(summary, topics, date)
return arr
date = "April 21st, 2024"
asyncio.run(create_summaries(date))