Improve llm architecture + improve ux in client

edkaya · edkaya · commit 7ed0ac7bfc6d · 2025-07-10T14:03:11.000+02:00
diff --git a/client/package-lock.json b/client/package-lock.json
diff --git a/client/package.json b/client/package.json
@@ -17,6 +17,7 @@
     "lucide-react": "^0.344.0",
     "react": "^18.3.1",
     "react-dom": "^18.3.1",
+    "react-markdown": "^10.1.0",
     "react-oidc-context": "^3.3.0",
     "react-router-dom": "^6.22.3"
   },
diff --git a/client/src/components/MessageItem.tsx b/client/src/components/MessageItem.tsx
@@ -1,6 +1,7 @@
 import React from "react";
 import { formatTimestamp } from "../utils/helpers";
 import { User, Bot } from "lucide-react";
+import Markdown from "react-markdown";
 
 interface MessageItemProps {
   message: Message;
@@ -34,12 +35,10 @@ const MessageItem: React.FC<MessageItemProps> = ({ message }) => {
               {formatTimestamp(message.timestamp)}
             </span>
           </div>
-          <div className="prose dark:prose-invert prose-sm sm:prose-base max-w-none">
-            {message.content.split("\n").map((paragraph, i) => (
-              <p key={i} className="mb-2 text-gray-800 dark:text-gray-200">
-                {paragraph}
-              </p>
-            ))}
+          <div className="prose dark:prose-invert prose-sm sm:prose-base max-w-none text-gray-800 dark:text-gray-200">
+            <Markdown>
+              {message.content}
+            </Markdown>
           </div>
         </div>
       </div>
diff --git a/client/src/components/UploadRecipe.tsx b/client/src/components/UploadRecipe.tsx
@@ -4,7 +4,7 @@ import { useAuth } from "../hooks/useAuth";
 
 const UploadRecipe: React.FC = () => {
   const [status, setStatus] = useState<
-    "idle" | "uploading" | "success" | "error"
+    "idle" | "uploading" | "success" | "already_uploaded" | "error"
   >("idle");
 
   const { user } = useAuth();
@@ -27,7 +27,15 @@ const UploadRecipe: React.FC = () => {
       });
 
       if (!response.ok) throw new Error("Upload failed");
-      setStatus("success");
+      
+      const data = await response.json();
+      const message = data.message || "";
+      
+      if (message.includes("already uploaded")) {
+        setStatus("already_uploaded");
+      } else {
+        setStatus("success");
+      }
 
       setTimeout(() => setStatus("idle"), 3000);
     } catch (err) {
@@ -62,6 +70,13 @@ const UploadRecipe: React.FC = () => {
         </div>
       )}
 
+      {status === "already_uploaded" && (
+        <div className="flex items-center justify-center gap-2 text-sm text-yellow-600">
+          <CheckCircle className="w-5 h-5" />
+          File Already Uploaded!
+        </div>
+      )}
+
       {status === "error" && (
         <div className="flex items-center justify-center gap-2 text-sm text-red-600">
           <XCircle className="w-5 h-5" />
diff --git a/genai/routes/routes.py b/genai/routes/routes.py
@@ -9,14 +9,17 @@
 from fastapi.responses import JSONResponse
 import os
 
-# from config import Config
 from logger import logger
 from time import perf_counter
 
-from vector_database.qdrant_vdb import QdrantVDB
-from rag.ingestion_pipeline import IngestionPipeline
-from rag.llm.chat_model import ChatModel
-# from rag.llm.cloud_chat_model import CloudLLM
+from service.llm_service import generate_response
+
+from service.qdrant_service import (
+    file_already_uploaded,
+    collection_already_exists,
+    ingest_file
+)
+
 from service.rag_service import (
     retrieve_similar_docs,
     prepare_prompt,
@@ -37,43 +40,6 @@
 
 router = APIRouter()
 
-# Set vector database
-qdrant = QdrantVDB()
-
-# Set chat model for local llm models
-# Make calls to local models in openwebui hosted by the university
-llm = ChatModel(model_name="llama3.3:latest")
-
-# Alternatively, we can switch to a chat model based on cloud models as well
-# If you want to use other cloud models, please adjust model_name,
-# model_provider, and api key
-# accordingly
-
-# Examples:
-# llm_cloud_anthropic = CloudLLM(
-#     model_name="claude-3-sonnet-20240229",
-#     model_provider="anthropic",
-#     api_key=Config.api_key_anthropic,
-# )
-# llm_cloud_openai = CloudLLM(
-#     model_name="gpt-4-1106-preview",
-#     model_provider="openai",
-#     api_key=Config.api_key_openai,
-# )
-#
-# llm_cloud_mistral = CloudLLM(
-#     model_name="mistral-medium",
-#     model_provider="mistral",
-#     api_key=Config.api_key_mistral,
-# )
-
-# If no parameters are provided, the default cloud model will be openai.
-# If a cloud model is wanted, please remove the comment
-# for package import "CloudLLM"
-
-# Example:
-# llm = CloudLLM() # same as llm_cloud_openai
-
 
 @router.post("/upload")
 async def upload_file(
@@ -102,26 +68,15 @@ async def upload_file(
             buffer.write(await file.read())
 
         collection_name = f"recipes_{current_user.user_id}"
-        if (
-            qdrant.client.collection_exists(collection_name)
-            and qdrant.collection_contains_file(
-                qdrant.client,
-                collection_name,
-                filename
-            )
-        ):
+        if file_already_uploaded(collection_name, filename):
             logger.info(
                 "File already exists in qdrant for user %s",
                 current_user.username
                 )
             return {"message": f"File '{filename}' already uploaded."}
 
-        vector_store = qdrant.create_and_get_vector_storage(collection_name)
-        ingestion_pipeline = IngestionPipeline(vector_store=vector_store)
-        ingestion_pipeline.ingest(file_path, filename)
-
+        ingest_file(collection_name, file_path, filename)
         file_upload_successfully_counter.inc()
-
         return {"message": "File processed successfully."}
 
     except Exception as e:
@@ -163,35 +118,32 @@ async def generate(request: Request):
 
     try:
         retrieved_docs = ""
-        if qdrant.client.collection_exists(collection_name):
-            vector_store = qdrant.create_and_get_vector_storage(
-                collection_name
-            )
+        if collection_already_exists(collection_name):
             logger.info(
-                "Vector store is created for the collection %s for user_id %s",
+                "Collection %s already exists for user_id %s",
                 collection_name,
                 user_id
             )
-            retrieved_docs = retrieve_similar_docs(vector_store, query)
+
+            retrieved_docs = retrieve_similar_docs(collection_name, query)
             logger.info("Similar docs retrieved from the vector store")
 
         messages = process_raw_messages(messages_raw)
         logger.info("Raw messages are processed for prompt preparation")
 
         prompt = prepare_prompt(
-            llm.get_system_prompt(),
             query,
             retrieved_docs,
             messages
         )
         logger.info("Prompt is prepared")
 
-        response = llm.invoke(prompt)
+        response = generate_response(prompt)
         logger.info("Response is generated")
 
         generation_successfully_counter.inc()
 
-        return JSONResponse(content={"response": response.content})
+        return JSONResponse(content={"response": response})
 
     except Exception as e:
         logger.error("Generation is failed. Error: %s", str(e), exc_info=True)
diff --git a/genai/service/llm_service.py b/genai/service/llm_service.py
@@ -0,0 +1,53 @@
+# from config import Config
+# from rag.llm.cloud_chat_model import CloudLLM
+from rag.llm.chat_model import ChatModel
+
+from langchain_core.prompt_values import PromptValue
+
+
+# Set chat model for local llm models
+# Make calls to local models in openwebui hosted by the university
+llm = ChatModel(model_name="llama3.3:latest")
+
+# Alternatively, we can switch to a chat model based on cloud models as well
+# If you want to use other cloud models, please adjust model_name,
+# model_provider, and api key
+# accordingly
+
+# Examples:
+# llm_cloud_anthropic = CloudLLM(
+#     model_name="claude-3-sonnet-20240229",
+#     model_provider="anthropic",
+#     api_key=Config.api_key_anthropic,
+# )
+# llm_cloud_openai = CloudLLM(
+#     model_name="gpt-4-1106-preview",
+#     model_provider="openai",
+#     api_key=Config.api_key_openai,
+# )
+#
+# llm_cloud_mistral = CloudLLM(
+#     model_name="mistral-medium",
+#     model_provider="mistral",
+#     api_key=Config.api_key_mistral,
+# )
+
+# If no parameters are provided, the default cloud model will be openai.
+# If a cloud model is wanted, please remove the comment
+# for package import "CloudLLM"
+
+# Example:
+# llm = CloudLLM() # same as llm_cloud_openai
+
+
+def get_system_prompt() -> str:
+    """
+    Returns the system prompt for the LLM.
+    This function provides the initial context and instructions for the LLM.
+    """
+    return llm.get_system_prompt()
+
+
+def generate_response(prompt: PromptValue) -> str:
+    response = llm.invoke(prompt)
+    return response.content
diff --git a/genai/service/qdrant_service.py b/genai/service/qdrant_service.py
@@ -0,0 +1,59 @@
+from rag.ingestion_pipeline import IngestionPipeline
+from vector_database.qdrant_vdb import QdrantVDB
+from logger import logger
+
+
+# Set vector database
+qdrant = QdrantVDB()
+
+
+def file_already_uploaded(collection_name: str, filename: str) -> bool:
+    """
+    Checks if a file has already been uploaded to the vector database.
+    This function is used to avoid duplicate uploads of the same file.
+    """
+    if (
+        qdrant.client.collection_exists(collection_name)
+        and qdrant.collection_contains_file(
+            qdrant.client,
+            collection_name,
+            filename
+        )
+    ):
+        return True
+    return False
+
+
+def get_vector_store(collection_name: str):
+    """
+    Returns the vector store for the specified collection name.
+    This function is used to retrieve the vector store instance for a specific
+    collection in the vector database.
+    """
+    return qdrant.create_and_get_vector_storage(collection_name)
+
+
+def collection_already_exists(collection_name: str) -> bool:
+    """
+    Checks if a collection already exists in the vector database.
+    This function is used to avoid creating duplicate collections.
+    """
+    if qdrant.client.collection_exists(collection_name):
+        logger.info(
+            "Collection already exists in qdrant: %s",
+            collection_name,
+        )
+        return True
+    return False
+
+
+def ingest_file(collection_name: str, file_path: str, filename: str):
+    """
+    Ingests a file into the vector database.
+    This function is used to process and store the file content in the vector
+    database for later retrieval.
+    """
+    pipeline = IngestionPipeline(
+        vector_store=get_vector_store(collection_name)
+        )
+    pipeline.ingest(file_path, filename)
diff --git a/genai/service/rag_service.py b/genai/service/rag_service.py
@@ -1,25 +1,27 @@
 from typing import List, Dict
 
-from langchain_qdrant import QdrantVectorStore
 from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 
+from service.qdrant_service import get_vector_store
+from service.llm_service import get_system_prompt
 
-def retrieve_similar_docs(vector_store: QdrantVectorStore, user_query: str):
+
+def retrieve_similar_docs(collection_name: str, user_query: str):
     """Retrieve similar documents based on the user query"""
+    vector_store = get_vector_store(collection_name)
     retriever = vector_store.as_retriever(search_kwargs={"k": 5})
     retrieved_docs = retriever.invoke(user_query)
     docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
     return docs_content
 
 
-def prepare_prompt(system_prompt: str,
-                   user_query: str,
+def prepare_prompt(user_query: str,
                    docs_content: str,
                    messages: List[BaseMessage]):
     """Prepare the prompt with prompt templates to give to LLM"""
     prompt_template = ChatPromptTemplate([
-        "system", system_prompt,
+        "system", get_system_prompt(),
         MessagesPlaceholder("msgs")
     ])