Merge pull request #102 from AET-DevOps25/feature/support-cloud-models

edkaya · web-flow · commit dcea2e7c9a0e · 2025-06-22T00:58:20.000+02:00
Add cloud llm model support
diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
@@ -73,10 +73,10 @@ services:
       - mongodb
     restart: unless-stopped
 
-  prometheus: 
+  prometheus:
     image: prom/prometheus:v2.52.0
     container_name: prometheus
-    ports: 
+    ports:
       - "9090:9090"
     volumes:
       - ./monitoring/prometheus:/etc/prometheus
@@ -128,7 +128,7 @@ services:
       - loki-data:/loki
     command: -config.file=/etc/loki/loki-config.yaml
     restart: unless-stopped
-  
+
   cadvisor:
     image: gcr.io/cadvisor/cadvisor:latest
     container_name: cadvisor
diff --git a/genai/.env.template b/genai/.env.template
@@ -1,3 +1,9 @@
+# Cloud based LLM models
 API_OPENAI="your openai key"
+API_ANTHROPIC="your anthropic key"
+API_MISTRAL="your mistral key"
+API_HUGGINGFACEHUB="your huggingface api token"
+# Local Models
 API_OPENWEBUI="your openwebui key"
+# Base URL for calling local models
 BASE_URL="https://gpu.aet.cit.tum.de"
diff --git a/genai/config.py b/genai/config.py
@@ -9,13 +9,19 @@
     "Config",
     [
         "api_key_openai",
-        "api_openwebui",
+        "api_key_anthropic",
+        "api_key_mistral",
+        "api_key_huggingface",
+        "api_key_openwebui",
         "base_url"
     ],
 )
 
 Config = ConfigT(
     api_key_openai=environ.get("API_OPENAI"),
-    api_openwebui=environ.get("API_OPENWEBUI"),
+    api_key_anthropic=environ.get("API_ANTHROPIC", ""),
+    api_key_mistral=environ.get("API_MISTRAL", ""),
+    api_key_huggingface=environ.get("API_HUGGINGFACE", ""),
+    api_key_openwebui=environ.get("API_OPENWEBUI"),
     base_url=environ.get("BASE_URL")
 )
diff --git a/genai/rag/llm/cloud_chat_model.py b/genai/rag/llm/cloud_chat_model.py
@@ -0,0 +1,77 @@
+import os
+
+from langchain.chat_models import init_chat_model
+from langchain_core.prompt_values import PromptValue
+from langchain_core.messages import BaseMessage
+
+from config import Config
+
+
+class CloudLLM:
+    """A concrete implementation of a cloud-based LLM.
+    Uses openai as the default LLM provider."""
+
+    def __init__(
+            self,
+            model_name: str = "gpt-4-1106-preview",
+            model_provider: str = "openai",
+            api_key: str = Config.api_key_openai
+    ):
+        provider = model_provider.lower()
+        if provider == "openai":
+            os.environ["OPENAI_API_KEY"] = (
+                    api_key
+                    or
+                    os.getenv("API_OPENAI", "")
+            )
+        elif provider == "anthropic":
+            os.environ["ANTHROPIC_API_KEY"] = (
+                    api_key
+                    or
+                    os.getenv("API_ANTHROPIC", "")
+            )
+        elif provider == "mistral":
+            os.environ["MISTRAL_API_KEY"] = (
+                    api_key
+                    or
+                    os.getenv("API_MISTRAL", "")
+            )
+        elif provider == "huggingface":
+            os.environ["HUGGINGFACEHUB_API_TOKEN"] = (
+                    api_key
+                    or
+                    os.getenv("API_HUGGINGFACEHUB", "")
+            )
+        else:
+            raise ValueError(f"Unsupported LLM provider: {provider}")
+
+        self.model = init_chat_model(
+            model=model_name,
+            model_provider=provider
+        )
+
+    def get_system_prompt(self) -> str:
+        """System prompt for the LLM"""
+        return """
+            You are an intelligent assistant that helps users discover
+            and generate recipes based on the ingredients they provide.
+
+            Use the contextual information provided below to tailor
+            your responses.
+
+            If relevant recipes or suggestions are found in the context,
+            prioritize those. If no relevant context is available,
+            use your own knowledge to help the user.
+
+            Context:
+            {context}
+
+            Be clear, creative, and helpful. If the user also asks
+            follow-up questions (e.g., dietary adjustments, name references,
+            meal timing), answer them precisely based on the
+            context and query.
+            """
+
+    def invoke(self, prompt: PromptValue) -> BaseMessage:
+        """Invoke the LLM with the given prompt"""
+        return self.model.invoke(prompt)
diff --git a/genai/routes/routes.py b/genai/routes/routes.py
@@ -1,12 +1,15 @@
 from fastapi import APIRouter, UploadFile, File, HTTPException, Request
 from fastapi.responses import JSONResponse
 import os
+
+# from config import Config
 from logger import logger
 from time import perf_counter
 
 from vector_database.qdrant_vdb import QdrantVDB
 from rag.ingestion_pipeline import IngestionPipeline
 from rag.llm.chat_model import ChatModel
+# from rag.llm.cloud_chat_model import CloudLLM
 from service.rag_service import (
     retrieve_similar_docs,
     prepare_prompt,
@@ -26,9 +29,43 @@
 
 router = APIRouter()
 
-llm = ChatModel(model_name="llama3.3:latest")
+# Set vector database
 qdrant = QdrantVDB()
 
+# Set chat model for local llm models
+# Make calls to local models in openwebui hosted by the university
+llm = ChatModel(model_name="llama3.3:latest")
+
+# Alternatively, we can switch to a chat model based on cloud models as well
+# If you want to use other cloud models, please adjust model_name,
+# model_provider, and api key
+# accordingly
+
+# Examples:
+# llm_cloud_anthropic = CloudLLM(
+#     model_name="claude-3-sonnet-20240229",
+#     model_provider="anthropic",
+#     api_key=Config.api_key_anthropic,
+# )
+# llm_cloud_openai = CloudLLM(
+#     model_name="gpt-4-1106-preview",
+#     model_provider="openai",
+#     api_key=Config.api_key_openai,
+# )
+#
+# llm_cloud_mistral = CloudLLM(
+#     model_name="mistral-medium",
+#     model_provider="mistral",
+#     api_key=Config.api_key_mistral,
+# )
+
+# If no parameters are provided, the default cloud model will be openai.
+# If a cloud model is wanted, please remove the comment
+# for package import "CloudLLM"
+
+# Example:
+# llm = CloudLLM() # same as llm_cloud_openai
+
 
 @router.post("/upload")
 async def upload_file(file: UploadFile = File(...)):
@@ -130,6 +167,7 @@ async def generate(request: Request):
 
         response = llm.invoke(prompt)
         logger.info("Response is generated")
+
         generation_successfully_counter.inc()
 
         return JSONResponse(content={"response": response.content})
diff --git a/genai/service/openwebui_service.py b/genai/service/openwebui_service.py
@@ -12,7 +12,7 @@ def generate_response(model_name: str, prompt: str):
     url = f"{BASE_URL}/api/chat/completions"
 
     headers = {
-        "Authorization": f"Bearer {Config.api_openwebui}",
+        "Authorization": f"Bearer {Config.api_key_openwebui}",
         "Content-Type": "application/json"
     }
 
diff --git a/genai/service/rag_service.py b/genai/service/rag_service.py
@@ -3,7 +3,6 @@
 from langchain_qdrant import QdrantVectorStore
 from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
-# from genai.rag.llm.chat_model import ChatModel
 
 
 def retrieve_similar_docs(vector_store: QdrantVectorStore, user_query: str):
@@ -48,14 +47,3 @@ def process_raw_messages(raw_messages: List[Dict]) -> List[BaseMessage]:
             processed_messages.append(AIMessage(content=content))
 
     return processed_messages
-
-# For testing purposes
-# if __name__ == "__main__":
-#     msg = HumanMessage(content="My name is John Doe.")
-#     llm = ChatModel()
-#     prompt = prepare_prompt(llm.get_system_prompt(),
-#                             "Suggest me a basic breakfast.",
-#                             "",
-#                             [msg])
-#     response = llm.invoke(prompt)
-#     print(response.content)

Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@ def generate_response(model_name: str, prompt: str):`
`12`	`12`	`url = f"{BASE_URL}/api/chat/completions"`
`13`	`13`
`14`	`14`	`headers = {`
`15`		`- "Authorization": f"Bearer {Config.api_openwebui}",`
	`15`	`+ "Authorization": f"Bearer {Config.api_key_openwebui}",`
`16`	`16`	`"Content-Type": "application/json"`
`17`	`17`	`}`
`18`	`18`