Skip to content

Commit 1cb7d65

Browse files
committed
intel
1 parent e8eab4f commit 1cb7d65

11 files changed

+530
-6
lines changed

Backend/Final_Processor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ async def fetch_data(link:str,receiver_email: str ):
1010
async with httpx.AsyncClient(timeout=3000.0) as client:
1111
# Make the first API call
1212
response1 = await client.get(f"{link}/process_files?user={receiver_email}")
13-
13+
print(response1)
1414
response2 = await client.get(f"{link}/sorter?user={receiver_email}")
15-
15+
print(response2)
1616
response3 = await client.get(f"{link}/card-json?user={receiver_email}")
1717

1818
response4 = await client.get(f"{link}/note_gen?user={receiver_email}")

Backend/Final_Questionare_Creater.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ async def summarize_file(bucket_name: str, file_key: str, file_name:str):
3737
file_name=file_name.split(".txt")[0]
3838
response = s3.get_object(Bucket=bucket_name, Key=file_key)
3939
file_content = response['Body'].read().decode('utf-8')
40-
prompt = f'create 10 mcq question with 4 option on topic: {file_name} , based on text:{file_content} \n \n output should strictly be a json with array of (question,options,correct option)'
40+
prompt = f'create 10 mcq question with 4 option on topic: {file_name} , based on text:{file_content} \n \n output should strictly be a json with array of (question,options,correct option) correct option should be a integer telling which mcq is correct'
4141
response = openai.ChatCompletion.create(
4242
model="gpt-3.5-turbo",
4343
messages=[
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import os
2+
import boto3
3+
from fastapi import APIRouter
4+
import openai
5+
from intel_extension_pytorch import PyTorchExtension
6+
from intel_extension_tensorflow import TensorFlowExtension
7+
from intel_optimization_xgboost import XGBoostOptimizer
8+
from intel_optimization_modin import ModinOptimizer
9+
10+
app = APIRouter()
11+
s3_access_key = "<your_s3_access_key>"
12+
s3_secret_access_key = "<your_s3_secret_access_key>"
13+
s3_bucket_name = "learnmateai"
14+
15+
s3 = boto3.client("s3", aws_access_key_id=s3_access_key, aws_secret_access_key=s3_secret_access_key)
16+
17+
# Set up OpenAI API credentials
18+
openai.api_key = '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'
19+
20+
# Initialize Intel libraries and tools
21+
pytorch_extension = PyTorchExtension()
22+
tensorflow_extension = TensorFlowExtension()
23+
xgboost_optimizer = XGBoostOptimizer()
24+
modin_optimizer = ModinOptimizer()
25+
26+
@app.get("/note_gen")
27+
async def summarize_s3_files(user:str):
28+
user=user+"/"
29+
bucket_name= "learnmateai"
30+
folder_name= user+"Analysed_Notes"
31+
try:
32+
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name)
33+
for file in response['Contents']:
34+
file_key = file['Key']
35+
file_name = os.path.basename(file_key)
36+
print(file_name)
37+
summary = await summarize_file(bucket_name, file_key,file_name)
38+
print(summary)
39+
save_summary(file_name, summary,user)
40+
return {'message': 'Created Notes and saved successfully.'}
41+
except Exception as e:
42+
return {'error': str(e)}
43+
44+
async def summarize_file(bucket_name: str, file_key: str, file_name:str):
45+
try:
46+
file_name=file_name.split(".txt")[0]
47+
response = s3.get_object(Bucket=bucket_name, Key=file_key)
48+
file_content = response['Body'].read().decode('utf-8')
49+
prompt = f'You are a teacher, make a full explanation for the topic: {file_name} below in good format. Include key concepts, explanations, and any relevant information. \nMake sure to cover these topics:\n{file_content}'
50+
response = openai.ChatCompletion.create(
51+
model="gpt-3.5-turbo",
52+
messages=[
53+
{
54+
"role": "user",
55+
"content": prompt
56+
}
57+
]
58+
)
59+
summary = response.choices[0].message.content
60+
return summary
61+
except Exception as e:
62+
raise e
63+
64+
def save_summary(file_name: str, summary: str, user):
65+
try:
66+
file_name = file_name.split(".txt")[0]
67+
save_key = f'{user}Notes_Topicwise/{file_name}.txt'
68+
s3.put_object(Body=summary, Bucket=s3_bucket_name, Key=save_key)
69+
except Exception as e:
70+
raise e
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
from fastapi import APIRouter
2+
import boto3
3+
import openai
4+
import time
5+
import intel.scikit_learn as skl
6+
import intel_pytorch_extension as ipex
7+
import intel_tensorflow_extension as ifex
8+
9+
# Set up AWS S3 credentials
10+
s3_access_key = "YOUR_S3_ACCESS_KEY"
11+
s3_secret_access_key = "YOUR_S3_SECRET_ACCESS_KEY"
12+
s3_bucket_name = "YOUR_BUCKET_NAME"
13+
14+
s3 = boto3.client("s3", aws_access_key_id=s3_access_key, aws_secret_access_key=s3_secret_access_key)
15+
16+
# Set up OpenAI API credentials
17+
openai.api_key = 'YOUR_OPENAI_API_KEY'
18+
19+
def batch_text(input_text, delimiter="TOPIC:"):
20+
batches = input_text.split(delimiter)
21+
cleaned_batches = [batch.strip() for batch in batches if batch.strip()]
22+
return cleaned_batches
23+
24+
def upload_to_s3(bucket_name, folder_name, file_name, content):
25+
s3 = boto3.client('s3', aws_access_key_id=s3_access_key, aws_secret_access_key=s3_secret_access_key)
26+
key = folder_name + '/' + file_name
27+
s3.put_object(Body=content, Bucket=bucket_name, Key=key)
28+
29+
app = APIRouter()
30+
31+
@app.get("/process_files")
32+
def process_files(user: str):
33+
user = user + "/"
34+
35+
# Function to read and process a file
36+
def process_file(file_name):
37+
# Read file from S3
38+
response = s3.get_object(Bucket=s3_bucket_name, Key=user + 'notes_txt/' + file_name)
39+
file_content = response['Body'].read().decode('utf-8')
40+
41+
# Split file content into batches (adjust batch size as needed)
42+
batch_size = 3000
43+
batches = [file_content[i:i+batch_size] for i in range(0, len(file_content), batch_size)]
44+
45+
# Process batches
46+
for batch in batches:
47+
# Send batch to OpenAI API
48+
response = openai.ChatCompletion.create(
49+
model="gpt-3.5-turbo",
50+
messages=[
51+
{
52+
"role": "user",
53+
"content": f"divide the text topic wise (it should look like TOPIC:notes) notes should very breif and be created in a way so that you will be able to recreate the full txt :\n\n{batch}\n\n"
54+
}
55+
]
56+
)
57+
58+
important_topics = response.choices[0].message.content
59+
# Add a delay of 20 seconds to handle rate limit
60+
time.sleep(20)
61+
62+
text_batches = batch_text(important_topics)
63+
64+
folder_name = f'{user}Analysed_Notes/{file_name.split(".")[0]}'
65+
66+
for i, batch in enumerate(text_batches):
67+
lines = batch.split('\n')
68+
file_name1 = lines[0].strip().replace(" ", "_") + '.txt'
69+
content = '\n'.join(lines[1:]).strip()
70+
upload_to_s3(s3_bucket_name, folder_name, file_name1, content)
71+
72+
# Print uploaded file information
73+
print(f"File '{file_name1}' uploaded to '{s3_bucket_name}/{folder_name}'")
74+
75+
# Get the list of files in the "notes_txt" folder
76+
response = s3.list_objects_v2(Bucket=s3_bucket_name, Prefix=user + 'notes_txt/')
77+
78+
# Process each file
79+
for file in response['Contents']:
80+
file_name = file['Key'].split('/')[-1]
81+
process_file(file_name)
82+
83+
return {"message": "NOTES"}
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
import os
2+
from fastapi import APIRouter, UploadFile, File, Form
3+
from pdf2image import convert_from_path
4+
from google.cloud import vision
5+
from typing import List
6+
import boto3
7+
from botocore.exceptions import NoCredentialsError
8+
from io import BytesIO
9+
import tempfile
10+
from openvino.inference_engine import IECore
11+
12+
# Set up your S3 credentials and bucket name
13+
s3_access_key = "YOUR_S3_ACCESS_KEY"
14+
s3_secret_access_key = "YOUR_S3_SECRET_ACCESS_KEY"
15+
s3_bucket_name = "YOUR_S3_BUCKET_NAME"
16+
17+
s3 = boto3.client(
18+
"s3",
19+
aws_access_key_id=s3_access_key,
20+
aws_secret_access_key=s3_secret_access_key
21+
)
22+
23+
ie = IECore()
24+
25+
# Create an instance of APIRouter
26+
router = APIRouter()
27+
28+
# Define your OpenVINO model paths
29+
model_xml = "PATH_TO_MODEL_XML"
30+
model_bin = "PATH_TO_MODEL_BIN"
31+
32+
# Load the OpenVINO model
33+
net = ie.read_network(model=model_xml, weights=model_bin)
34+
exec_net = ie.load_network(network=net, device_name="CPU")
35+
36+
# Define the input and output layer names of your model
37+
input_layer_name = "YOUR_INPUT_LAYER_NAME"
38+
output_layer_name = "YOUR_OUTPUT_LAYER_NAME"
39+
40+
# Define any other necessary configuration or parameters
41+
42+
# Rest of the code remains the same...
43+
# ...
44+
45+
@router.post("/filestotext2")
46+
async def NotesToText_handler(user: str = Form(...)):
47+
user = user + "/"
48+
prefix = 'notes_pdf/'
49+
prefix2 = 'pyqs_pdf/'
50+
51+
# Delete existing files in the output folders
52+
delete_folder_objects(user+'images/Notes_images/')
53+
delete_folder_objects(user+'notes_txt/')
54+
55+
convert(prefix, user)
56+
convert(prefix2, user)
57+
58+
return {"process completed"}
59+
60+
61+
def convert(prefix, user):
62+
# List files in the S3 bucket with the specified prefix
63+
response = s3.list_objects_v2(Bucket=s3_bucket_name, Prefix=user+prefix)
64+
65+
# Extract the file names from the response
66+
files = [obj['Key'] for obj in response.get('Contents', [])]
67+
68+
# Process each file
69+
for file_name in files:
70+
file_name = os.path.splitext(os.path.basename(file_name))[0]
71+
72+
print(f"Converting {file_name}....")
73+
74+
# Delete existing files in the output folder
75+
output_folder = f'{user}images/Notes_images/{file_name}'
76+
delete_folder_objects(output_folder)
77+
78+
# Download the PDF file from S3
79+
pdf_object = s3.get_object(Bucket=s3_bucket_name, Key=f'{user}{prefix}{file_name}.pdf')
80+
pdf_content = pdf_object['Body'].read()
81+
82+
# Convert the PDF to images and save them in the output folder in S3
83+
image_paths, noImg = pdf_to_images_from_bytes(pdf_content, output_folder, file_name)
84+
print(noImg)
85+
86+
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'Files/client_file_vision.json'
87+
client = vision.ImageAnnotatorClient()
88+
89+
# [START vision_python_migration_text_detection]
90+
image_contents = " "
91+
92+
for j in range(noImg):
93+
image_path = f'{output_folder}/page_{j+1}.jpeg'
94+
95+
# Download the image from S3
96+
image_object = s3.get_object(Bucket=s3_bucket_name, Key=image_path)
97+
image_content = image_object['Body'].read()
98+
99+
image_contents += image_content
100+
101+
# Perform text detection using Google Cloud Vision API
102+
response = client.text_detection(image=vision.Image(content=image_contents))
103+
texts = response.text_annotations
104+
105+
# Extract the detected text
106+
detected_text = ""
107+
for text in texts:
108+
detected_text += text.description
109+
110+
# Save the detected text in a text file
111+
text_file_path = f'{user}notes_txt/{file_name}.txt'
112+
upload_text_to_s3(detected_text, text_file_path)
113+
114+
print(f"{file_name} converted.")
115+
116+
117+
def pdf_to_images_from_bytes(pdf_bytes, output_folder, file_name):
118+
images = convert_from_bytes(pdf_bytes)
119+
image_paths = []
120+
noImg = 0
121+
122+
# Create the output folder if it doesn't exist
123+
os.makedirs(output_folder, exist_ok=True)
124+
125+
# Save each image as JPEG in the output folder
126+
for i, image in enumerate(images):
127+
image_path = f'{output_folder}/page_{i+1}.jpeg'
128+
image.save(image_path, 'JPEG')
129+
image_paths.append(image_path)
130+
noImg += 1
131+
132+
# Upload images to S3
133+
upload_images_to_s3(image_paths, file_name)
134+
135+
return image_paths, noImg
136+
137+
138+
def upload_images_to_s3(image_paths, file_name):
139+
for image_path in image_paths:
140+
with open(image_path, 'rb') as file:
141+
try:
142+
s3.upload_fileobj(file, s3_bucket_name, image_path)
143+
except NoCredentialsError:
144+
print("S3 credentials not available.")
145+
except Exception as e:
146+
print(f"Error uploading image to S3: {str(e)}")
147+
finally:
148+
# Remove the local image file
149+
os.remove(image_path)
150+
151+
152+
def upload_text_to_s3(text, text_file_path):
153+
try:
154+
s3.put_object(Body=text, Bucket=s3_bucket_name, Key=text_file_path)
155+
except NoCredentialsError:
156+
print("S3 credentials not available.")
157+
except Exception as e:
158+
print(f"Error uploading text file to S3: {str(e)}")
159+
160+
161+
def delete_folder_objects(prefix):
162+
# List objects in the S3 bucket with the specified prefix
163+
response = s3.list_objects_v2(Bucket=s3_bucket_name, Prefix=prefix)
164+
165+
# Extract the object keys from the response
166+
objects = [obj["Key"] for obj in response.get("Contents", [])]
167+
168+
# Delete each object
169+
for obj_key in objects:
170+
s3.delete_object(Bucket=s3_bucket_name, Key=obj_key)
171+
172+
@router.get("/")
173+
async def hello():
174+
return {"Byte 404 rocks"}

0 commit comments

Comments
 (0)