chunker

ANGELOANTU7 · ANGELOANTU7 · commit 2e3dc068e4af · 2023-06-24T16:41:55.000+05:30
diff --git a/Backend/NotesChunker.py b/Backend/NotesChunker.py
@@ -1,27 +1,22 @@
-from fastapi import APIRouter
 import tensorflow_hub as hub
-import tensorflow_text
 from sklearn.cluster import KMeans
 import numpy as np
-
-
-
-app = APIRouter()
-
-# Load the USE model
-embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-multilingual/3")
+from tqdm import tqdm
 
 # Preprocessing functions
 def preprocess_text(text):
     sentences = text.split('\n')  # Split text into sentences
     return sentences
 
-# API route for extracting topic-wise chunks
-@app.post("/extract_chunks")
-def extract_chunks(text: str):
+def extract_chunks(text):
     # Preprocess the input text
     sentences = preprocess_text(text)
     
+    # Show progress bar while loading the model
+    with tqdm(total=1, desc="Loading model") as pbar:
+        embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5")
+        pbar.update(1)
+    
     # Generate sentence embeddings
     sentence_embeddings = embed(sentences)
     
@@ -47,4 +42,9 @@ def extract_chunks(text: str):
         chunk_sentences = [sentences[i] for i in range(len(sentences)) if kmeans.labels_[i] == cluster_index]
         chunks.append({"topic": f"Topic {cluster_index+1}", "subsections": chunk_sentences})
     
-    return chunks
+    return chunks
+
+# Example usage
+text = "This is an example text. It contains multiple sentences.\nEach sentence represents a subsection."
+result = extract_chunks(text)
+print(result)
diff --git a/Backend/__pycache__/NotesChunker.cpython-310.pyc b/Backend/__pycache__/NotesChunker.cpython-310.pyc
diff --git a/Backend/documentai.py b/Backend/documentai.py
@@ -0,0 +1,70 @@
+import nltk
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.decomposition import TruncatedSVD
+
+# Step 1: Preprocessing
+def preprocess_notes(notes):
+    # Perform any necessary preprocessing steps here
+    preprocessed_notes = []
+    for note in notes:
+        # Apply preprocessing to each note
+        # Example: Convert to lowercase, remove punctuation, etc.
+        preprocessed_notes.append(note.lower())
+    return preprocessed_notes
+
+# Step 2: Document-Term Matrix
+def create_document_term_matrix(preprocessed_notes):
+    vectorizer = TfidfVectorizer()
+    X = vectorizer.fit_transform(preprocessed_notes)
+    return X
+
+# Step 3: Apply LSA
+def apply_lsa(X, number_of_topics):
+    lsa = TruncatedSVD(n_components=number_of_topics)
+    lsa_representation = lsa.fit_transform(X)
+    return lsa_representation
+
+# Step 4: Topic Extraction
+def extract_topics(lsa_representation, notes):
+    topic_wise_notes = {}
+    for i, note in enumerate(notes):
+        topic = lsa_representation[i].argmax()
+        if topic not in topic_wise_notes:
+            topic_wise_notes[topic] = []
+        topic_wise_notes[topic].append(note)
+    return topic_wise_notes
+
+# Main code
+def main():
+    # Input: List of notes
+    your_notes_list = [
+        "Note 1",
+        "Note 2",
+        "Note 3",
+        # Add more notes as needed
+    ]
+    
+    # Set the number of topics for LSA
+    number_of_topics = 3
+    
+    # Step 1: Preprocessing
+    preprocessed_notes = preprocess_notes(your_notes_list)
+    
+    # Step 2: Document-Term Matrix
+    X = create_document_term_matrix(preprocessed_notes)
+    
+    # Step 3: Apply LSA
+    lsa_representation = apply_lsa(X, number_of_topics)
+    
+    # Step 4: Topic Extraction
+    topic_wise_notes = extract_topics(lsa_representation, your_notes_list)
+    
+    # Print the topic-wise notes
+    for topic, notes in topic_wise_notes.items():
+        print(f"Topic {topic}:")
+        for note in notes:
+            print(note)
+        print()
+
+# Execute the main function
+main()
diff --git a/__pycache__/app.cpython-310.pyc b/__pycache__/app.cpython-310.pyc
diff --git a/app.py b/app.py
@@ -7,8 +7,8 @@
 
 #from Backend.Notes_Analyser import router as api4_router
 #from Backend.Narrator import router as api5_router
-from Backend.NotesChunker import app as chunker 
-from Backend.NotesToText import router as notestotxt
+from Backend.NotesChunker import router as chunker 
+#from Backend.NotesToText import router as notestotxt
 
 # import other API routers as needed
 
@@ -33,7 +33,7 @@
 #app.include_router(sorter)
 #app.include_router(api4_router)
 app.include_router(chunker)
-app.include_router(notestotxt)
+#app.include_router(notestotxt)
 
 # include other API routers as needed
 
diff --git a/model b/model
@@ -0,0 +1,3 @@
+<!doctype html><html><head><title>TensorFlow Hub</title><meta name="google-site-verification" content="qqy8ICUGzfuvtOqpeIPRRHh105pRztoKQMm4wgowXGg"/><link rel="icon" href="//www.gstatic.com/aihub/tfhub_logo_3.png"><link rel="stylesheet" href="https://www.gstatic.com/_/tfhubdev/_/ss/k=tfhubdev.h.oe6hlFNL_vk.L.X.O/d=0/rs=AJFuRJ-fCeC1XBTBUgsFAruZh0T1mqQs2w"><!-- Integrate Glue's Carousel --><link href="//www.gstatic.com/glue/v21_0/glue.min.css" rel="stylesheet"><script src="//www.gstatic.com/glue/v21_0/glue-detect.min.js"></script><base href="/"></head><body><app-root>Loading...</app-root><script id="base-js" src="https://www.gstatic.com/_/tfhubdev/_/js/k=tfhubdev.h.en_US.bgI9DrEVhiQ.O/d=1/rs=AJFuRJ_J_qAVaLGqSi5q1oSOLnDm1Ze6ug/m=b" async></script><script>
+          window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date;
+        ga('create', 'UA-121310548-2', 'auto'); window.analyticsId = 'UA-121310548-2';</script><script async src="https://www.google-analytics.com/analytics.js"></script><!-- Integrate Glue's Carousel --><script src="https://www.gstatic.com/external_hosted/hammerjs/v2_0_2/hammer.min.js"></script><script src="//www.gstatic.com/glue/v21_0/glue-vanilla.min.js"></script></body></html>
diff --git a/model.pb b/model.pb
@@ -0,0 +1,3 @@
+<!doctype html><html><head><title>TensorFlow Hub</title><meta name="google-site-verification" content="qqy8ICUGzfuvtOqpeIPRRHh105pRztoKQMm4wgowXGg"/><link rel="icon" href="//www.gstatic.com/aihub/tfhub_logo_3.png"><link rel="stylesheet" href="https://www.gstatic.com/_/tfhubdev/_/ss/k=tfhubdev.h.oe6hlFNL_vk.L.X.O/d=0/rs=AJFuRJ-fCeC1XBTBUgsFAruZh0T1mqQs2w"><!-- Integrate Glue's Carousel --><link href="//www.gstatic.com/glue/v21_0/glue.min.css" rel="stylesheet"><script src="//www.gstatic.com/glue/v21_0/glue-detect.min.js"></script><base href="/"></head><body><app-root>Loading...</app-root><script id="base-js" src="https://www.gstatic.com/_/tfhubdev/_/js/k=tfhubdev.h.en_US.bgI9DrEVhiQ.O/d=1/rs=AJFuRJ_J_qAVaLGqSi5q1oSOLnDm1Ze6ug/m=b" async></script><script>
+          window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date;
+        ga('create', 'UA-121310548-2', 'auto'); window.analyticsId = 'UA-121310548-2';</script><script async src="https://www.google-analytics.com/analytics.js"></script><!-- Integrate Glue's Carousel --><script src="https://www.gstatic.com/external_hosted/hammerjs/v2_0_2/hammer.min.js"></script><script src="//www.gstatic.com/glue/v21_0/glue-vanilla.min.js"></script></body></html>

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	+<!doctype html><html><head><title>TensorFlow Hub</title><meta name="google-site-verification" content="qqy8ICUGzfuvtOqpeIPRRHh105pRztoKQMm4wgowXGg"/><link rel="icon" href="//www.gstatic.com/aihub/tfhub_logo_3.png"><link rel="stylesheet" href="https://www.gstatic.com/_/tfhubdev/_/ss/k=tfhubdev.h.oe6hlFNL_vk.L.X.O/d=0/rs=AJFuRJ-fCeC1XBTBUgsFAruZh0T1mqQs2w"><!-- Integrate Glue's Carousel --><link href="//www.gstatic.com/glue/v21_0/glue.min.css" rel="stylesheet"><script src="//www.gstatic.com/glue/v21_0/glue-detect.min.js"></script><base href="/"></head><body><app-root>Loading...</app-root><script id="base-js" src="https://www.gstatic.com/_/tfhubdev/_/js/k=tfhubdev.h.en_US.bgI9DrEVhiQ.O/d=1/rs=AJFuRJ_J_qAVaLGqSi5q1oSOLnDm1Ze6ug/m=b" async></script><script>
	`2`	`+ window.ga=window.ga\|\|function(){(ga.q=ga.q\|\|[]).push(arguments)};ga.l=+new Date;`
	`3`	`+ ga('create', 'UA-121310548-2', 'auto'); window.analyticsId = 'UA-121310548-2';</script><script async src="https://www.google-analytics.com/analytics.js"></script><!-- Integrate Glue's Carousel --><script src="https://www.gstatic.com/external_hosted/hammerjs/v2_0_2/hammer.min.js"></script><script src="//www.gstatic.com/glue/v21_0/glue-vanilla.min.js"></script></body></html>`