Revert to main versions: llm.py, main.py, request_models.py

marvin-heinrich · marvin-heinrich · commit 863435f79645 · 2025-07-20T16:36:49.000+02:00
diff --git a/genAi/llm.py b/genAi/llm.py
@@ -12,112 +12,76 @@
 
 
 class StudyLLM:
-    # Class-level attributes for lazy initialization
-    _chat_llm = None
-    _generation_llm = None
-
-    @classmethod
-    def _get_chat_llm(cls):
-        """Lazy initialization of chat LLM"""
-        if cls._chat_llm is None:
-            cls._chat_llm = ChatOpenAI(
-                model="llama3.3:latest",
-                temperature=0.5,
-                api_key=os.getenv("OPEN_WEBUI_API_KEY_CHAT"),
-                base_url="https://gpu.aet.cit.tum.de/api/",
-            )
-        return cls._chat_llm
-
-    @classmethod
-    def _get_generation_llm(cls):
-        """Lazy initialization of generation LLM"""
-        if cls._generation_llm is None:
-            cls._generation_llm = ChatOpenAI(
-                model="llama3.3:latest",
-                temperature=0.5,
-                api_key=os.getenv("OPEN_WEBUI_API_KEY_GEN"),
-                base_url="https://gpu.aet.cit.tum.de/api/",
-            )
-        return cls._generation_llm
-
-    @property
-    def chat_llm(self):
-        """Get the chat LLM instance"""
-        return self._get_chat_llm()
-
-    @chat_llm.setter
-    def chat_llm(self, value):
-        """Set the chat LLM instance (for testing)"""
-        StudyLLM._chat_llm = value
-
-    @chat_llm.deleter
-    def chat_llm(self):
-        """Reset the chat LLM instance (for testing)"""
-        StudyLLM._chat_llm = None
-
-    @property
-    def generation_llm(self):
-        """Get the generation LLM instance"""
-        return self._get_generation_llm()
-
-    @generation_llm.setter
-    def generation_llm(self, value):
-        """Set the generation LLM instance (for testing)"""
-        StudyLLM._generation_llm = value
-
-    @generation_llm.deleter
-    def generation_llm(self):
-        """Reset the generation LLM instance (for testing)"""
-        StudyLLM._generation_llm = None
-
+    # for chat
+    chat_llm = ChatOpenAI(
+        model="llama3.3:latest",
+        temperature=0.5,
+        api_key=os.getenv("OPEN_WEBUI_API_KEY_CHAT"),
+        base_url="https://gpu.aet.cit.tum.de/api/"
+    )
+    
+    # For summaries, quizzes, flashcards
+    generation_llm = ChatOpenAI(
+        model="llama3.3:latest",
+        temperature=0.5,
+        api_key=os.getenv("OPEN_WEBUI_API_KEY_GEN"),
+        base_url="https://gpu.aet.cit.tum.de/api/"
+    )
+    
     def __init__(self, doc_path: str):
-        base_system_template = (
-            "You are an expert on the information in the context given below.\n"
-            "Use the context as your primary knowledge source. If you can't fulfill your task given the context, just say that.\n"
-            "context: {context}\n"
-            "Your task is {task}"
-        )
-        self.base_prompt_template = ChatPromptTemplate.from_messages(
-            [("system", base_system_template), ("human", "{input}")]
-        )
-        try:
+        base_system_template = ("You are an expert on the information in the context given below.\n"
+                                     "Use the context as your primary knowledge source. If you can't fulfill your task given the context, just say that.\n"
+                                    "context: {context}\n"
+                                    "Your task is {task}"
+                                    )
+        self.base_prompt_template = ChatPromptTemplate.from_messages([
+            ('system', base_system_template),
+            ('human', '{input}')
+        ])
+        try: 
             self.rag_helper = RAGHelper(doc_path)
         except Exception as e:
             raise ValueError(f"Error initializing RAGHelper: {e}")
 
+    
     async def prompt(self, prompt: str) -> str:
         """
         Call the LLM with a given prompt.
-
+        
         Args:
             prompt (str): The input prompt for the LLM.
-
+        
         Returns:
             str: The response from the LLM.
         """
-        task = (
+        task =  (
             "To answer questions based on your context."
             "If you're asked a question that does not relate to your context, do not answer it - instead, answer by saying you're only familiar with <the topic in your context>.\n"
-        )
-
+            )
+        
         context = self.rag_helper.retrieve(prompt, top_k=5)
         chain = self.base_prompt_template | self.chat_llm
-        response = await chain.ainvoke(
-            {"context": context, "task": task, "input": prompt}
-        )
-
+        response = await chain.ainvoke({
+            'context': context,
+            'task':task,
+            'input':prompt
+            })
+        
         return response.content
 
     async def summarize(self):
         """
         Summarize the given document using the LLM.
-
+        
         Returns:
             str: The summary of the document.
         """
-
+        
         map_prompt = PromptTemplate.from_template(
-            (f"Write a medium length summary of the following:\n\n" "{text}")
+            (
+            f"Write a medium length summary of the following:\n\n"
+            "{text}"
+            )
         )
 
         combine_prompt = PromptTemplate.from_template(
@@ -135,42 +99,37 @@ async def summarize(self):
             self.generation_llm,
             chain_type="map_reduce",
             map_prompt=map_prompt,
-            combine_prompt=combine_prompt,
-        )
-
-        result = await chain.ainvoke(
-            {"input_documents": self.rag_helper.summary_chunks}
+            combine_prompt=combine_prompt
         )
 
+        result = await chain.ainvoke({"input_documents": self.rag_helper.summary_chunks})
+        
         return result["output_text"]
-
+        
     async def generate_flashcards(self):
         """
         Generate flashcards from the document using the LLM.
-
+        
         Returns:
             list: A list of flashcard objects.
         """
         flashcard_chain = FlashcardChain(self.generation_llm)
         cards = await flashcard_chain.invoke(self.rag_helper.summary_chunks)
         return cards
-
+    
     async def generate_quiz(self):
         """
         Generate a quiz from the document using the LLM.
-
+        
         Returns:
             list: A quiz object.
         """
         quiz_chain = QuizChain(self.generation_llm)
         quiz = await quiz_chain.invoke(self.rag_helper.summary_chunks)
         return quiz
-
+    
     def cleanup(self):
         """
         Cleanup resources used by the LLM.
         """
-        try:
-            self.rag_helper.cleanup()
-        except Exception as e:
-            print(f"Error during RAGHelper cleanup: {e}")
+        self.rag_helper.cleanup()
diff --git a/genAi/main.py b/genAi/main.py
@@ -3,14 +3,7 @@
 from fastapi import FastAPI
 from fastapi.responses import JSONResponse
 from helpers import save_document
-from request_models import (
-    CreateSessionRequest,
-    PromptRequest,
-    SummaryRequest,
-    QuizRequest,
-    FlashcardRequest,
-    ProcessRequest,
-)
+from request_models import CreateSessionRequest, PromptRequest, SummaryRequest, QuizRequest, FlashcardRequest
 from llm import StudyLLM
 from prometheus_fastapi_instrumentator import Instrumentator
 
@@ -21,15 +14,13 @@
 
 llm_instances: dict[str, StudyLLM] = {}
 
-
 @asynccontextmanager
 async def lifespan(_):
     yield
     # Shutdown: cleanup
     for llm in llm_instances.values():
         llm.cleanup()
 
-
 app = FastAPI(
     title="tutor",
     openapi_tags=[
@@ -47,20 +38,19 @@ async def lifespan(_):
         },
         {"name": "Ingestion", "description": "Endpoints to start ingestion processes."},
     ],
-    lifespan=lifespan,
+    lifespan=lifespan
 )
 
 Instrumentator(
-    excluded_handlers=["/metrics"],
+    excluded_handlers=['/metrics'],
     should_group_status_codes=False,
-    should_instrument_requests_inprogress=True,
-).instrument(app).expose(app)
+    should_instrument_requests_inprogress=True
+    ).instrument(app).expose(app)
 
 
 # llm_instances["dummy"] = StudyLLM("./documents/example/W07_Microservices_and_Scalable_Architectures.pdf") # TODO: remove
 # llm_instances["dummy2"] = StudyLLM("./documents/example/dummy_knowledge.txt") # TODO: remove
 
-
 # Auxiliary Endpoints
 @app.get("/health")
 async def health_check():
@@ -81,10 +71,8 @@ async def load_session(data: CreateSessionRequest):
         if data.session_id in llm_instances:
             logger.info(f"Session {data.session_id} already exists")
             return {"message": "Session already loaded."}
-
-        logger.info(
-            f"Creating new session {data.session_id} for document {data.document_name}"
-        )
+        
+        logger.info(f"Creating new session {data.session_id} for document {data.document_name}")
         doc_name = f"{data.session_id}_{data.document_name}"
         path = save_document(doc_name, data.document_base64)
         llm_instances[data.session_id] = StudyLLM(path)
@@ -105,10 +93,8 @@ async def receive_prompt(data: PromptRequest):
         if data.session_id not in llm_instances:
             error_msg = f"Session {data.session_id} not found. Please ensure the document was processed successfully."
             logger.error(error_msg)
-            return JSONResponse(
-                status_code=404, content={"response": f"ERROR: {error_msg}"}
-            )
-
+            return JSONResponse(status_code=404, content={"response": f"ERROR: {error_msg}"})
+        
         logger.info(f"Processing chat request for session {data.session_id}")
         response = await llm_instances[data.session_id].prompt(data.message)
         return {"response": response}
@@ -117,7 +103,6 @@ async def receive_prompt(data: PromptRequest):
         logger.error(error_msg)
         return {"response": f"ERROR: {error_msg}"}
 
-
 @app.post("/summary")
 async def generate_summary(data: SummaryRequest):
     """
@@ -128,7 +113,7 @@ async def generate_summary(data: SummaryRequest):
             error_msg = f"Session {data.session_id} not found. Please ensure the document was processed successfully."
             logger.error(error_msg)
             return {"response": f"ERROR: {error_msg}"}
-
+        
         logger.info(f"Generating summary for session {data.session_id}")
         response = await llm_instances[data.session_id].summarize()
         return {"response": response}
@@ -137,7 +122,6 @@ async def generate_summary(data: SummaryRequest):
         logger.error(error_msg)
         return {"response": f"ERROR: {error_msg}"}
 
-
 @app.post("/flashcard")
 async def generate_flashcards(data: FlashcardRequest):
     """
@@ -148,19 +132,16 @@ async def generate_flashcards(data: FlashcardRequest):
             error_msg = f"Session {data.session_id} not found. Please ensure the document was processed successfully."
             logger.error(error_msg)
             return {"response": {"flashcards": [], "error": error_msg}}
-
+        
         logger.info(f"Generating flashcards for session {data.session_id}")
         response = await llm_instances[data.session_id].generate_flashcards()
         logger.info(f"Flashcards generated successfully for session {data.session_id}")
         return {"response": response}
     except Exception as e:
-        error_msg = (
-            f"Flashcard generation error for session {data.session_id}: {str(e)}"
-        )
+        error_msg = f"Flashcard generation error for session {data.session_id}: {str(e)}"
         logger.error(error_msg)
         return {"response": {"flashcards": [], "error": error_msg}}
 
-
 @app.post("/quiz")
 async def generate_quiz(data: QuizRequest):
     """
@@ -171,7 +152,7 @@ async def generate_quiz(data: QuizRequest):
             error_msg = f"Session {data.session_id} not found. Please ensure the document was processed successfully."
             logger.error(error_msg)
             return {"response": {"questions": [], "error": error_msg}}
-
+        
         logger.info(f"Generating quiz for session {data.session_id}")
         response = await llm_instances[data.session_id].generate_quiz()
         logger.info(f"Quiz generated successfully for session {data.session_id}")
@@ -181,9 +162,8 @@ async def generate_quiz(data: QuizRequest):
         logger.error(error_msg)
         return {"response": {"questions": [], "error": error_msg}}
 
-
 @app.post("/process")
-async def process_document(data: ProcessRequest):
+async def process_document(data: SummaryRequest):
     """Compatibility endpoint for Kotlin genai-service (/process).
     It creates a session (if not present) and immediately returns QUEUED.
     (Actual processing e.g. summary generation can be triggered asynchronously.)"""
@@ -199,13 +179,13 @@ async def process_document(data: ProcessRequest):
             "requestId": session_id,
             "status": "QUEUED",
             "message": "Document queued for processing",
-            "estimatedTime": None,
+            "estimatedTime": None
         }
     except Exception as e:
         logger.error(f"/process error: {str(e)}")
         return {
             "requestId": None,
             "status": "FAILED",
             "message": f"Failed to process document: {str(e)}",
-            "estimatedTime": None,
-        }
+            "estimatedTime": None
+        }
diff --git a/genAi/request_models.py b/genAi/request_models.py