Skip to content

Commit 8ce1842

Browse files
authored
Merge pull request #43 from AET-DevOps25/feature/setup-llm-inference
Add LLM inference for response generation
2 parents b62ccea + e381ef5 commit 8ce1842

File tree

7 files changed

+238
-9
lines changed

7 files changed

+238
-9
lines changed

genai/app.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from dotenv import load_dotenv
22
from waitress import serve
33
from flask import Flask
4-
from controller.generate_controller import generate_bp
4+
from genai.controller.generate_controller import generate_bp
55

6-
from config import Config
6+
from genai.config import Config
77

88
app = Flask(__name__)
99
app.register_blueprint(generate_bp)

genai/config.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,15 @@
99
"Config",
1010
[
1111
"api_key_openai",
12-
"waitress"
12+
"waitress",
13+
"api_openwebui",
14+
"base_url"
1315
],
1416
)
1517

1618
Config = ConfigT(
1719
api_key_openai=environ.get("API_SECRET_OPENAI_MINE"),
1820
waitress=environ.get("USE_WAITRESS", "false").lower() == "true",
21+
api_openwebui=environ.get("API_OPENWEBUI"),
22+
base_url=environ.get("BASE_URL")
1923
)

genai/controller/generate_controller.py

Lines changed: 71 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,25 @@
33
import logging
44
from werkzeug.utils import secure_filename
55

6-
from rag.ingestion_pipeline import IngestionPipeline
7-
from vector_database.qdrant_vdb import QdrantVDB
6+
from genai.rag.ingestion_pipeline import IngestionPipeline
7+
from genai.vector_database.qdrant_vdb import QdrantVDB
8+
from genai.rag.llm.chat_model import ChatModel
9+
from genai.service.rag_service import (
10+
retrieve_similar_docs,
11+
prepare_prompt,
12+
process_raw_messages
13+
)
14+
815

916
# Set Logging
1017
logging.getLogger().setLevel(logging.INFO)
1118

19+
# Set ChatModel
20+
llm = ChatModel(model_name="llama3.3:latest")
21+
22+
# Set Vector Database
23+
qdrant = QdrantVDB()
24+
1225
generate_bp = Blueprint('generate', __name__)
1326

1427

@@ -31,8 +44,6 @@ def upload_file():
3144

3245
try:
3346
collection_name = "recipes"
34-
# Initialize vector database
35-
qdrant = QdrantVDB()
3647
# Check if the file already in the collection
3748
if (qdrant.client.collection_exists(collection_name)
3849
and qdrant.collection_contains_file(
@@ -69,6 +80,60 @@ def upload_file():
6980
os.remove(file_path)
7081

7182

72-
@generate_bp.route('/api/generate', methods=['POST'])
83+
@generate_bp.route('/genai/generate', methods=['POST'])
7384
def generate():
74-
return jsonify({'output': 'Hello World!'})
85+
"""
86+
API Endpoint for generating recipe responses using retrieved context.
87+
88+
This endpoint processes a user query against a vector database of recipes
89+
and returns an AI-generated response using both retrieved context and
90+
the full conversation history provided in the request.
91+
92+
Request Body:
93+
query (str): The user's recipe-related query
94+
messages (List[Dict]): Full conversation history,
95+
each with 'role' and 'content'
96+
Example:
97+
[
98+
{"role": "USER", "content": "I have eggs and tomatoes."},
99+
{"role": "ASSISTANT", "content": "You could make shakshuka."}
100+
]
101+
102+
Returns:
103+
JSON response containing:
104+
- 'response': The generated assistant reply
105+
"""
106+
data = request.get_json()
107+
108+
if not data or "query" not in data or "messages" not in data:
109+
return jsonify({"error": "Missing 'query' or 'messages'"}), 400
110+
111+
query = data["query"]
112+
messages_raw = data["messages"]
113+
114+
try:
115+
collection_name = "recipes"
116+
117+
if qdrant.client.collection_exists(collection_name):
118+
# Get vector store
119+
vector_store = qdrant.create_and_get_vector_storage(
120+
collection_name
121+
)
122+
# turn raw message into BaseMessage type
123+
messages = process_raw_messages(messages_raw)
124+
retrieved_docs = retrieve_similar_docs(vector_store, query)
125+
prompt = prepare_prompt(
126+
llm.get_system_prompt(),
127+
query,
128+
retrieved_docs,
129+
messages
130+
)
131+
132+
response = llm.invoke(prompt)
133+
134+
return jsonify({
135+
"response": response.content,
136+
}), 200
137+
138+
except Exception as e:
139+
return jsonify({"error": str(e)}), 500

genai/rag/llm/__init__.py

Whitespace-only changes.

genai/rag/llm/chat_model.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from typing import List
2+
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
3+
from langchain_core.language_models.chat_models import BaseChatModel
4+
from langchain_core.outputs import ChatResult, ChatGeneration
5+
from pydantic import Field
6+
7+
from genai.service.openwebui_service import generate_response
8+
9+
10+
class ChatModel(BaseChatModel):
11+
model_name: str = Field(default="llama3.3:latest")
12+
13+
def _generate(self, messages: List[BaseMessage],
14+
stop=None,
15+
**kwargs) -> ChatResult:
16+
prompt = "\n".join([
17+
f"User: {m.content}" if isinstance(m, HumanMessage)
18+
else f"Assistant: {m.content}" if isinstance(m, AIMessage)
19+
else ""
20+
for m in messages
21+
])
22+
response_text = generate_response(self.model_name, prompt)
23+
24+
return ChatResult(
25+
generations=[
26+
ChatGeneration(message=AIMessage(content=response_text))
27+
]
28+
)
29+
30+
@property
31+
def _llm_type(self) -> str:
32+
return "recipai-custom-model"
33+
34+
def get_system_prompt(self) -> str:
35+
"""System prompt for the LLM"""
36+
return """
37+
You are an intelligent assistant that helps users discover
38+
and generate recipes based on the ingredients they provide.
39+
40+
Use the contextual information provided below to tailor
41+
your responses.
42+
43+
If relevant recipes or suggestions are found in the context,
44+
prioritize those. If no relevant context is available,
45+
use your own knowledge to help the user.
46+
47+
Context:
48+
{context}
49+
50+
Be clear, creative, and helpful. If the user also asks
51+
follow-up questions (e.g., dietary adjustments, name references,
52+
meal timing), answer them precisely based on the
53+
context and query.
54+
"""

genai/service/openwebui_service.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import requests
2+
3+
from genai.config import Config
4+
5+
BASE_URL = Config.base_url
6+
7+
8+
def generate_response(model_name: str, prompt: str):
9+
"""Making a POST request to the respective endpoint for
10+
response generation by an LLM"""
11+
url = f"{BASE_URL}/api/chat/completions"
12+
13+
headers = {
14+
"Authorization": f"Bearer {Config.api_openwebui}",
15+
"Content-Type": "application/json"
16+
}
17+
18+
payload = {
19+
"model": model_name,
20+
"messages": [
21+
{
22+
"role": "user",
23+
"content": prompt
24+
}
25+
]
26+
}
27+
28+
try:
29+
response = requests.post(
30+
url,
31+
json=payload,
32+
headers=headers,
33+
timeout=120
34+
)
35+
response.raise_for_status()
36+
return response.json()["choices"][0]["message"]["content"]
37+
38+
except requests.exceptions.HTTPError as e:
39+
raise RuntimeError(
40+
f"HTTP error from LLM server: {e}, {response.status_code})"
41+
) from e
42+
except requests.exceptions.Timeout as e:
43+
raise RuntimeError(f"Request to LLM timed out: {e}") from e
44+
except requests.exceptions.RequestException as e:
45+
raise RuntimeError(f"Request to LLM failed: {e}") from e

genai/service/rag_service.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from typing import List, Dict
2+
3+
from langchain_qdrant import QdrantVectorStore
4+
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
5+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
6+
# from genai.rag.llm.chat_model import ChatModel
7+
8+
9+
def retrieve_similar_docs(vector_store: QdrantVectorStore, user_query: str):
10+
"""Retrieve similar documents based on the user query"""
11+
retriever = vector_store.as_retriever(search_kwargs={"k": 5})
12+
retrieved_docs = retriever.invoke(user_query)
13+
docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
14+
return docs_content
15+
16+
17+
def prepare_prompt(system_prompt: str,
18+
user_query: str,
19+
docs_content: str,
20+
messages: List[BaseMessage]):
21+
"""Prepare the prompt with prompt templates to give to LLM"""
22+
prompt_template = ChatPromptTemplate([
23+
"system", system_prompt,
24+
MessagesPlaceholder("msgs")
25+
])
26+
27+
full_messages = messages + [HumanMessage(content=user_query)]
28+
29+
prompt = prompt_template.invoke({
30+
"context": docs_content,
31+
"msgs": full_messages
32+
})
33+
34+
return prompt
35+
36+
37+
def process_raw_messages(raw_messages: List[Dict]) -> List[BaseMessage]:
38+
"""Turns raw messages into BaseMessages, so they can be passed into LLM"""
39+
processed_messages = []
40+
for msg in raw_messages:
41+
role = msg.get("role")
42+
content = msg.get("content")
43+
44+
if role.upper() == "USER":
45+
processed_messages.append(HumanMessage(content=content))
46+
47+
elif role.upper() == "ASSISTANT":
48+
processed_messages.append(AIMessage(content=content))
49+
50+
return processed_messages
51+
52+
# For testing purposes
53+
# if __name__ == "__main__":
54+
# msg = HumanMessage(content="My name is John Doe.")
55+
# llm = ChatModel()
56+
# prompt = prepare_prompt(llm.get_system_prompt(),
57+
# "Suggest me a basic breakfast.",
58+
# "",
59+
# [msg])
60+
# response = llm.invoke(prompt)
61+
# print(response.content)

0 commit comments

Comments
 (0)