Skip to content

Commit bec1e9d

Browse files
committed
latestpyqsorter done
1 parent a9a0492 commit bec1e9d

File tree

4 files changed

+148
-2
lines changed

4 files changed

+148
-2
lines changed

Backend/LatestSorter.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
from fastapi import APIRouter
2+
import boto3
3+
import openai
4+
import time
5+
from botocore.exceptions import ClientError
6+
7+
number=4
8+
s3_access_key = "AKIAZTHHIOR4JJ5HLTUB"
9+
s3_secret_access_key = "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"
10+
s3_bucket_name = "learnmateai"
11+
12+
s3 = boto3.client("s3", aws_access_key_id=s3_access_key, aws_secret_access_key=s3_secret_access_key)
13+
14+
# Set up OpenAI API credentials
15+
openai.api_key = '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'
16+
17+
def batch_text(input_text, delimiter="Module"):
18+
batches = input_text.split(delimiter)
19+
cleaned_batches = [batch.strip() for batch in batches if batch.strip()]
20+
if(len(cleaned_batches)<3):
21+
batches = input_text.split("MODULE")
22+
cleaned_batches = [batch.strip() for batch in batches if batch.strip()]
23+
return cleaned_batches
24+
25+
def upload_to_s3(bucket_name, folder_name, file_name, content):
26+
s3 = boto3.client('s3')
27+
key = folder_name + '/' + file_name
28+
s3.put_object(Body=content, Bucket=bucket_name, Key=key)
29+
30+
def get_text_from_s3(bucket_name, file_name, encoding='utf-8'):
31+
response = s3.get_object(Bucket=bucket_name, Key=file_name)
32+
content = response['Body'].read()
33+
34+
try:
35+
text_content = content.decode(encoding)
36+
except UnicodeDecodeError:
37+
# Handle decoding error gracefully
38+
text_content = content.decode('latin-1') # Try an alternative encoding
39+
40+
return text_content
41+
42+
app = APIRouter()
43+
44+
@app.get("/sorter")
45+
def process_files():
46+
# Function to read and process a file
47+
def process_file(file_name):
48+
# Read file from S3
49+
50+
response = s3.get_object(Bucket='learnmateai', Key='pyqs_txt/' + file_name)
51+
file_content = response['Body'].read().decode('utf-16-le')
52+
53+
# Split file content into batches (adjust batch size as needed)
54+
batch_size = 30000
55+
batches = [file_content[i:i+batch_size] for i in range(0, len(file_content), batch_size)]
56+
response2 = s3.get_object(Bucket='learnmateai', Key= "syllabus_pdf/syllabus.txt")
57+
topics = response2['Body'].read().decode('utf-8')
58+
# Process batches
59+
Sorted_PYQ_Mod=[[]for _ in range(5)]
60+
for batch in batches:
61+
# Send batch to OpenAI API
62+
63+
64+
response = openai.ChatCompletion.create(
65+
model="gpt-3.5-turbo",
66+
messages=[
67+
{
68+
"role": "user",
69+
"content": f"I will feed you a question paper as text,sort the question in the text below based on this syllabus having {number} modules :{topics} (it should look exactly like MODULE:questions ) all questions should cluster under its module , the output should exactly have the {number} number of ""MODULE"" written under each the questions come ,it should have all MODULE even if any is empty, never give question with there modules i need it as grouped under module always :\n\n{batch}\n\n"
70+
}
71+
]
72+
)
73+
74+
important_topics = response.choices[0].message.content
75+
#print(important_topics)
76+
#return important_topics
77+
# Add a delay of 20 seconds to handle rate limit
78+
79+
80+
text_batches = batch_text(important_topics)
81+
#print(text_batches)
82+
83+
bucket_name = 'learnmateai'
84+
folder_name = 'Sorted_PYQS/'
85+
86+
87+
i=0
88+
try:
89+
90+
for batch in enumerate(text_batches):
91+
print(batch)
92+
93+
result=' '.join(str(element) for element in batch)
94+
new_content = result
95+
response = s3.get_object(Bucket=bucket_name, Key=folder_name+"Module"+str(i+1)+".txt")
96+
current_content = response['Body'].read().decode('utf-8')
97+
98+
99+
updated_content = current_content + new_content
100+
101+
# Upload the updated content to S3
102+
s3.put_object(Bucket=bucket_name, Key=folder_name+"Module"+str(i+1)+".txt", Body=updated_content.encode('utf-8'))
103+
104+
# Print uploaded file information
105+
print(f"File uploaded to '{bucket_name}/{folder_name}'")
106+
i=i+1
107+
108+
109+
110+
time.sleep(20)
111+
112+
except ClientError as e:
113+
114+
if e.response['Error']['Code'] == 'NoSuchKey':
115+
print("File not found in S3 bucket.")
116+
117+
for batch in enumerate(text_batches):
118+
print(batch)
119+
120+
result=' '.join(str(element) for element in batch)
121+
new_content = '\n'.join(result[1:]).strip()
122+
123+
124+
updated_content =new_content
125+
126+
# Upload the updated content to S3
127+
s3.put_object(Bucket=bucket_name, Key=folder_name+"Module"+str(i+1)+".txt", Body=updated_content.encode('utf-8'))
128+
129+
# Print uploaded file information
130+
print(f"File uploaded to '{bucket_name}/{folder_name}'")
131+
i=i+1
132+
else:
133+
print("An error occurred:", e)
134+
135+
136+
137+
138+
# Get the list of files in the "notes_txt" folder
139+
response = s3.list_objects_v2(Bucket='learnmateai', Prefix='pyqs_txt/')
140+
141+
# Process each file
142+
for file in response['Contents']:
143+
file_name = file['Key'].split('/')[-1]
144+
process_file(file_name)
145+
146+
return {"message": "File processing completed."}
4.35 KB
Binary file not shown.

__pycache__/app.cpython-310.pyc

56 Bytes
Binary file not shown.

app.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from fastapi import FastAPI
33
from fastapi.middleware.cors import CORSMiddleware
44
from mangum import Mangum
5-
#from Backend.pyqsorter import router as api1_router
5+
from Backend.LatestSorter import app as sorter
66
#from Backend.summariser import router_summariser as summariser
77

88
#from Backend.Notes_Analyser import router as api4_router
@@ -30,7 +30,7 @@
3030
# Mount the API routerss
3131
#app.include_router(api1_router)
3232

33-
#app.include_router(sorter)
33+
app.include_router(sorter)
3434
#app.include_router(api4_router)
3535
app.include_router(chunker)
3636
#app.include_router(notestotxt)

0 commit comments

Comments
 (0)