Merge pull request #35 from AET-DevOps25/feature/genai-llm-abstraction

mahdibayouli · web-flow · commit 3d72a4efba64 · 2025-06-13T14:00:33.000+02:00
feat(genai): Implement LLM abstraction layer
diff --git a/genai/dummy_llm.py b/genai/dummy_llm.py
@@ -0,0 +1,21 @@
+# genai/dummy_llm.py
+from fastapi import FastAPI
+import uvicorn
+
+# This dummy server mimics the API of local LLM hosts like LM Studio or Ollama
+app = FastAPI()
+
+@app.post("/v1/chat/completions")
+def dummy_completion():
+    return {
+        "choices": [{
+            "message": {
+                "role": "assistant",
+                "content": "This is a dummy summary from the local model."
+            }
+        }]
+    }
+
+if __name__ == "__main__":
+    print("Starting Dummy LLM Server on port 8001...")
+    uvicorn.run(app, host="0.0.0.0", port=8001)
diff --git a/genai/src/main.py b/genai/src/main.py
@@ -13,8 +13,11 @@
 from .services.embedding import embedder_service
 from .services.embedding.schemas import EmbedRequest, EmbedResponse, QueryRequest, QueryResponse, DocumentResult
 from .services.embedding.weaviate_service import get_weaviate_client, ensure_schema_exists, DOCUMENT_CLASS_NAME
+from .services.llm import llm_service
+from .services.llm.schemas import GenerateRequest, GenerateResponse
 from langchain_openai import OpenAIEmbeddings
 
+
 # --- Configuration ---
 logging.basicConfig(level=logging.INFO)
 load_dotenv()
@@ -101,4 +104,18 @@ def query_vector_db(request: QueryRequest):
     
     docs_data = result["data"]["Get"][DOCUMENT_CLASS_NAME]
     docs = [DocumentResult(**doc) for doc in docs_data]
-    return QueryResponse(query=request.query_text, results=docs)
+    return QueryResponse(query=request.query_text, results=docs)
+
+@app.post("/generate", response_model=GenerateResponse)
+def generate_completion(request: GenerateRequest):
+    """Generates a text completion using the configured LLM abstraction layer."""
+    try:
+        generated_text = llm_service.generate_text(request.prompt)
+        return GenerateResponse(
+            prompt=request.prompt,
+            generated_text=generated_text,
+            provider=os.getenv("LLM_PROVIDER", "dummy")
+        )
+    except Exception as e:
+        logging.error(f"ERROR during text generation: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to generate text: {str(e)}")
diff --git a/genai/src/services/llm/__init__.py b/genai/src/services/llm/__init__.py
diff --git a/genai/src/services/llm/llm_service.py b/genai/src/services/llm/llm_service.py
@@ -0,0 +1,56 @@
+# genai/src/services/llm/llm_service.py
+
+import os
+import logging
+from langchain_openai import ChatOpenAI
+from langchain_community.llms import FakeListLLM
+from langchain_core.language_models.base import BaseLanguageModel
+
+def llm_factory() -> BaseLanguageModel:
+    """
+    Factory function to create and return an LLM instance based on the provider
+    specified in the environment variables.
+    """
+    provider = os.getenv("LLM_PROVIDER", "dummy").lower()
+    logging.info(f"--- Creating LLM for provider: {provider} ---")
+
+    if provider == "openai":
+        if not os.getenv("OPENAI_API_KEY"):
+            raise ValueError("OPENAI_API_KEY is not set for the 'openai' provider.")
+        # Returns a high-quality chat model from OpenAI
+        return ChatOpenAI(model="gpt-4o-mini", temperature=0.7)
+    
+    elif provider == "dummy":
+        # This is a fake LLM for testing. It will cycle through these responses.
+        responses = [
+            "The first summary from the dummy LLM is about procedural languages.",
+            "The second summary is about object-oriented programming.",
+            "This is a fallback response.",
+        ]
+        return FakeListLLM(responses=responses)
+    
+    # In the future, you could add other providers like 'ollama' here
+    # elif provider == "ollama":
+    #     return ChatOllama(model="llama3")
+
+    else:
+        raise ValueError(f"Unsupported LLM provider: {provider}")
+
+def generate_text(prompt: str) -> str:
+    """
+    Generates a text completion for a given prompt using the configured LLM.
+    """
+    # 1. Get the correct LLM instance from our factory
+    llm = llm_factory()
+    
+    # 2. Invoke the LLM with the prompt
+    #    (The .invoke() method is standard across all LangChain models)
+    response = llm.invoke(prompt)
+
+    # 3. The response object's structure can vary slightly by model.
+    #    For Chat models, the text is in the .content attribute.
+    #    For standard LLMs (like our FakeListLLM), it's the string itself.
+    if hasattr(response, 'content'):
+        return response.content
+    else:
+        return response
diff --git a/genai/src/services/llm/schemas.py b/genai/src/services/llm/schemas.py
@@ -0,0 +1,10 @@
+# genai/src/services/llm/schemas.py
+from pydantic import BaseModel
+
+class GenerateRequest(BaseModel):
+    prompt: str
+
+class GenerateResponse(BaseModel):
+    prompt: str
+    generated_text: str
+    provider: str