Skip to content

Commit dd2ad7d

Browse files
committed
added code
1 parent 7a2c0bf commit dd2ad7d

File tree

84 files changed

+3325
-175
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+3325
-175
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,7 @@ Backend/requirements.txt
33
dat.txt
44
temp.txt
55
Backend/test.py
6+
main.py
7+
iteration3/
8+
test_files/
9+
Backend/test/

Backend/Final_LatestSorter.py

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
from fastapi import APIRouter
2+
import boto3
3+
import openai
4+
import time
5+
from botocore.exceptions import ClientError
6+
7+
number=4
8+
s3_access_key = ""
9+
s3_secret_access_key = ""
10+
s3_bucket_name = "learnmateai"
11+
12+
s3 = boto3.client("s3", aws_access_key_id=s3_access_key, aws_secret_access_key=s3_secret_access_key)
13+
14+
# Set up OpenAI API credentials
15+
openai.api_key = ''
16+
17+
def batch_text(input_text, delimiter="Module"):
18+
batches = input_text.split(delimiter)
19+
cleaned_batches = [batch.strip() for batch in batches if batch.strip()]
20+
if(len(cleaned_batches)<3):
21+
batches = input_text.split("MODULE")
22+
cleaned_batches = [batch.strip() for batch in batches if batch.strip()]
23+
return cleaned_batches
24+
25+
def upload_to_s3(bucket_name, folder_name, file_name, content):
26+
s3 = boto3.client('s3')
27+
key = folder_name + '/' + file_name
28+
s3.put_object(Body=content, Bucket=bucket_name, Key=key)
29+
30+
def get_text_from_s3(bucket_name, file_name, encoding='utf-8'):
31+
response = s3.get_object(Bucket=bucket_name, Key=file_name)
32+
content = response['Body'].read()
33+
34+
try:
35+
text_content = content.decode(encoding)
36+
except UnicodeDecodeError:
37+
# Handle decoding error gracefully
38+
text_content = content.decode('latin-1') # Try an alternative encoding
39+
40+
return text_content
41+
42+
app = APIRouter()
43+
44+
@app.get("/sorter")
45+
def process_files(user:str):
46+
user=user+"/"
47+
# Make an API request with a reset message
48+
response = openai.ChatCompletion.create(
49+
model="gpt-3.5-turbo",
50+
messages=[
51+
{
52+
"role": "user",
53+
"content": "forget everything told before by me"
54+
}
55+
]
56+
)
57+
print("reseting")
58+
# Function to read and process a file
59+
def process_file(file_name,user1):
60+
61+
62+
63+
# Read file from S3
64+
print(user1)
65+
response = s3.get_object(Bucket='learnmateai', Key=user1+'pyqs_txt/' + file_name)
66+
file_content = response['Body'].read().decode('utf-8')
67+
68+
# Split file content into batches (adjust batch size as needed)
69+
batch_size = 30000
70+
batches = [file_content[i:i+batch_size] for i in range(0, len(file_content), batch_size)]
71+
print(user1+"syllabus_txt/syllabus.txt")
72+
response2 = s3.get_object(Bucket='learnmateai', Key= user1+"syllabus_pdf/syllabus.txt")
73+
topics = response2['Body'].read().decode('utf-8')
74+
# Process batches
75+
Sorted_PYQ_Mod=[[]for _ in range(5)]
76+
for batch in batches:
77+
# Send batch to OpenAI API
78+
print(batch)
79+
80+
response = openai.ChatCompletion.create(
81+
model="gpt-3.5-turbo",
82+
messages=[
83+
{
84+
"role": "user",
85+
"content": f"I will feed you a question paper as text,sort the question in the text below based on this syllabus having {number} modules :{topics} (it should look exactly like MODULE:questions ) all questions should cluster under its module , the output should exactly have the {number} number of ""MODULE"" written under each the questions come ,it should have all MODULE even if any is empty, never give question seperately with there modules tag, i need questions to be grouped under module always,Any output you give should only be from the txt given below you should not create any new question :\n\n{batch}\n\n"
86+
}
87+
]
88+
)
89+
90+
important_topics = response.choices[0].message.content
91+
#print(important_topics)
92+
#return important_topics
93+
# Add a delay of 20 seconds to handle rate limit
94+
95+
96+
text_batches = batch_text(important_topics)
97+
#print(text_batches)
98+
99+
bucket_name = 'learnmateai'
100+
folder_name = user1+'Sorted_PYQS/'
101+
102+
103+
i=0
104+
try:
105+
106+
for batch in enumerate(text_batches):
107+
print(batch)
108+
109+
result=' '.join(str(element) for element in batch)
110+
new_content = result
111+
response = s3.get_object(Bucket=bucket_name, Key=folder_name+"Module"+str(i+1)+".txt")
112+
current_content = response['Body'].read().decode('utf-8')
113+
114+
115+
updated_content = current_content + new_content
116+
117+
# Upload the updated content to S3
118+
s3.put_object(Bucket=bucket_name, Key=folder_name+"Module"+str(i+1)+".txt", Body=updated_content.encode('utf-8'))
119+
120+
# Print uploaded file information
121+
print(f"File uploaded to '{user1}{bucket_name}/{folder_name}'")
122+
i=i+1
123+
124+
125+
126+
time.sleep(20)
127+
128+
except ClientError as e:
129+
130+
if e.response['Error']['Code'] == 'NoSuchKey':
131+
print("File not found in S3 bucket.")
132+
133+
for batch in enumerate(text_batches):
134+
print(batch)
135+
136+
result=' '.join(str(element) for element in batch)
137+
new_content = result
138+
139+
#print(result)
140+
updated_content =new_content
141+
142+
# Upload the updated content to S3
143+
s3.put_object(Bucket=bucket_name, Key=folder_name+"Module"+str(i+1)+".txt", Body=updated_content.encode('utf-8'))
144+
145+
# Print uploaded file information
146+
print(f"File uploaded to '{user1}{bucket_name}/{folder_name}'")
147+
i=i+1
148+
else:
149+
print("An error occurred:", e)
150+
151+
152+
153+
154+
# Get the list of files in the "notes_txt" folder
155+
response = s3.list_objects_v2(Bucket='learnmateai', Prefix=user+'pyqs_txt/')
156+
157+
# Process each file
158+
for file in response['Contents']:
159+
print(file)
160+
file_name = file['Key'].split('/')[-1]
161+
print(file_name)
162+
process_file(file_name,user)
163+
164+
return {"message": "PYQS SORTED"}

Backend/Final_NotesChunker.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
from fastapi import APIRouter
2+
import boto3
3+
import openai
4+
import time
5+
6+
s3_access_key = ""
7+
s3_secret_access_key = ""
8+
s3_bucket_name = "learnmateai"
9+
10+
s3 = boto3.client("s3", aws_access_key_id=s3_access_key, aws_secret_access_key=s3_secret_access_key)
11+
12+
# Set up OpenAI API credentials
13+
openai.api_key = ''
14+
15+
def batch_text(input_text, delimiter="TOPIC:"):
16+
batches = input_text.split(delimiter)
17+
cleaned_batches = [batch.strip() for batch in batches if batch.strip()]
18+
return cleaned_batches
19+
20+
def upload_to_s3(bucket_name, folder_name, file_name, content):
21+
s3 = boto3.client('s3')
22+
key = folder_name + '/' + file_name
23+
s3.put_object(Body=content, Bucket=bucket_name, Key=key)
24+
25+
app = APIRouter()
26+
27+
@app.get("/process_files")
28+
def process_files(user: str):
29+
user=user+"/"
30+
# Function to read and process a file
31+
def process_file(file_name):
32+
# Read file from S3
33+
response = s3.get_object(Bucket='learnmateai', Key=user+'notes_txt/' + file_name)
34+
file_content = response['Body'].read().decode('utf-8')
35+
36+
# Split file content into batches (adjust batch size as needed)
37+
batch_size = 3000
38+
batches = [file_content[i:i+batch_size] for i in range(0, len(file_content), batch_size)]
39+
40+
# Process batches
41+
for batch in batches:
42+
# Send batch to OpenAI API
43+
44+
45+
response = openai.ChatCompletion.create(
46+
model="gpt-3.5-turbo",
47+
messages=[
48+
{
49+
"role": "user",
50+
"content": f"divide the text topic wise (it should look like TOPIC:notes) notes should very breif and be created in a way so that you will be able to recreate the full txt :\n\n{batch}\n\n"
51+
}
52+
]
53+
)
54+
55+
important_topics = response.choices[0].message.content
56+
#print(important_topics)
57+
#return important_topics
58+
# Add a delay of 20 seconds to handle rate limit
59+
time.sleep(20)
60+
61+
text_batches = batch_text(important_topics)
62+
63+
bucket_name = 'learnmateai'
64+
file=file_name.split(".")[0]
65+
folder_name = f'{user}Analysed_Notes/{file}'
66+
67+
for i, batch in enumerate(text_batches):
68+
lines = batch.split('\n')
69+
file_name1 = lines[0].strip().replace(" ", "_") + '.txt'
70+
content = '\n'.join(lines[1:]).strip()
71+
upload_to_s3(bucket_name, folder_name, file_name1, content)
72+
73+
# Print uploaded file information
74+
print(f"File '{file_name1}' uploaded to '{bucket_name}/{folder_name}'")
75+
76+
# Get the list of files in the "notes_txt" folder
77+
response = s3.list_objects_v2(Bucket='learnmateai', Prefix=user+'notes_txt/')
78+
79+
# Process each file
80+
for file in response['Contents']:
81+
file_name = file['Key'].split('/')[-1]
82+
process_file(file_name)
83+
84+
return {"message": "NOTES"}

0 commit comments

Comments
 (0)