Skip to content

Commit ed1b905

Browse files
committed
2 parents 89d5b78 + d245916 commit ed1b905

File tree

9 files changed

+98
-17
lines changed

9 files changed

+98
-17
lines changed

Backend/Final_LatestSorter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def process_file(file_name,user1):
6363
# Read file from S3
6464
print(user1)
6565
response = s3.get_object(Bucket='learnmateai', Key=user1+'pyqs_txt/' + file_name)
66-
file_content = response['Body'].read().decode('utf-16-le')
66+
file_content = response['Body'].read().decode('utf-8')
6767

6868
# Split file content into batches (adjust batch size as needed)
6969
batch_size = 30000
File renamed without changes.
File renamed without changes.
-11 Bytes
Binary file not shown.
4.63 KB
Binary file not shown.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
2+
{'best_video_url': 'https://www.youtube.com/watch?v=GwIo3gDZCVQ', 'elapsed_time': 58.66309332847595}
3+
4+
Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)
5+
{'best_video_url': 'https://www.youtube.com/watch?v=GwIo3gDZCVQ', 'elapsed_time': 54.04244136810303}
6+
7+

Backend/benchmark_python/video.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
from sklearnex import unpatch_sklearn
2+
unpatch_sklearn()
3+
from sklearn.metrics.pairwise import cosine_similarity
4+
from transformers import pipeline
5+
import requests
6+
import json
7+
from youtube_transcript_api import YouTubeTranscriptApi
8+
from sentence_transformers import SentenceTransformer
9+
import torch
10+
import time
11+
12+
# Load the Universal Sentence Encoder model
13+
model = SentenceTransformer('bert-base-nli-mean-tokens')
14+
15+
# YouTube API parameters
16+
API_KEY = "AIzaSyAMD4FgbCjmp-_8g8nams4tsno4DV1mDnE"
17+
MAX_RESULTS = 50 # Maximum number of search results to retrieve
18+
19+
# Search for videos using the YouTube API
20+
def search_videos(query):
21+
url = f"https://www.googleapis.com/youtube/v3/search?key={API_KEY}&part=snippet&type=video&maxResults={MAX_RESULTS}&q={query}"
22+
response = requests.get(url)
23+
data = json.loads(response.text)
24+
video_ids = [item['id']['videoId'] for item in data['items']]
25+
video_titles = [item['snippet']['title'] for item in data['items']]
26+
return video_ids, video_titles
27+
28+
# Retrieve video transcripts using the YouTube Transcript API
29+
def get_video_transcripts(video_ids):
30+
transcripts = []
31+
for video_id in video_ids:
32+
try:
33+
transcript = YouTubeTranscriptApi.get_transcript(video_id)
34+
text = ' '.join([line['text'] for line in transcript])
35+
transcripts.append(text)
36+
except:
37+
transcripts.append('')
38+
return transcripts
39+
40+
def get_best_video(input_text: str):
41+
start_time = time.time() # Start measuring time
42+
43+
# Encode the input text
44+
input_embedding = model.encode([input_text], convert_to_tensor=True)
45+
46+
# Search for videos and retrieve video transcripts
47+
video_ids, video_titles = search_videos(input_text)
48+
video_transcripts = get_video_transcripts(video_ids)
49+
50+
# Encode the video transcripts
51+
video_embeddings = model.encode(video_transcripts, convert_to_tensor=True)
52+
53+
# Calculate the similarity between the input text and video transcripts
54+
similarity_scores = cosine_similarity(input_embedding, video_embeddings)
55+
56+
# Rank the videos based on similarity scores
57+
ranked_videos = sorted(zip(video_ids, video_titles, similarity_scores), key=lambda x: x[2], reverse=True)
58+
59+
# Select the top-ranked video ID as the best match
60+
best_video_id = ranked_videos[0][0]
61+
62+
# Construct the YouTube video URL
63+
best_video_url = f"https://www.youtube.com/watch?v={best_video_id}"
64+
65+
end_time = time.time() # Stop measuring time
66+
elapsed_time = end_time - start_time
67+
68+
return {"best_video_url": best_video_url, "elapsed_time": elapsed_time}
69+
70+
71+
# Example usage:
72+
input_text = "machine learning tutorial"
73+
best_video = get_best_video(input_text)
74+
print(best_video)

__pycache__/app.cpython-310.pyc

-399 Bytes
Binary file not shown.

app.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@
1212
#from Backend.Notes_Analyser import router as api4_router
1313
#from Backend.Narrator import router as api5_router
1414
#from Backend.NotesChunker import app as chunker
15-
#from Backend.Final_NotesToText import router as notestotxt
16-
from Backend.Final_Processor import app as processor
17-
from Backend.Final_NotesChunker import app as chunker
18-
from Backend.Final_LatestSorter import app as sorter
19-
from Backend.Final_Sections_topics_json import app as cardmaker
20-
from Backend.Final_Notes_gen import app as notesgen
21-
from Backend.Final_Questionare_Creater import app as mcq_gen
22-
from Backend.Final_email_test import router as email_sender
15+
from Backend.Final_NotesToText import router as notestotxt
16+
#from Backend.Final_Processor import app as processor
17+
#from Backend.Final_NotesChunker import app as chunker
18+
#from Backend.Final_LatestSorter import app as sorter
19+
#from Backend.Final_Sections_topics_json import app as cardmaker
20+
#from Backend.Final_Notes_gen import app as notesgen
21+
#from Backend.Final_Questionare_Creater import app as mcq_gen
22+
#from Backend.Final_email_test import router as email_sender
2323
#from Backend.SortedPQYsender import app as pyqsender
2424
#from Backend.Perfect_video import app as videofinder
2525

@@ -44,14 +44,14 @@
4444
# Mount the API routerss
4545

4646

47-
#app.include_router(notestotxt)
48-
app.include_router(email_sender)
49-
app.include_router(processor)
50-
app.include_router(chunker)
51-
app.include_router(sorter)
52-
app.include_router(cardmaker)
53-
app.include_router(notesgen)
54-
app.include_router(mcq_gen)
47+
app.include_router(notestotxt)
48+
#app.include_router(email_sender)
49+
#app.include_router(processor)
50+
#app.include_router(chunker)
51+
#app.include_router(sorter)
52+
#app.include_router(cardmaker)
53+
#app.include_router(notesgen)
54+
#app.include_router(mcq_gen)
5555
#app.include_router(chunker)
5656
#app.include_router(pyqsender)
5757

0 commit comments

Comments
 (0)