Skip to content

Commit 64ca20a

Browse files
committed
Add Prometheus Service and Enable GenAI Metrics
1 parent 8e54cf3 commit 64ca20a

File tree

7 files changed

+95
-5
lines changed

7 files changed

+95
-5
lines changed

docker-compose.dev-no-traefik.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ services:
4747
- "8081:8081" # FastAPI GenAI service
4848
networks:
4949
- dev-network
50+
env_file:
51+
- ./genai/.env
5052
environment:
5153
- WEAVIATE_HOST=weaviate
5254
- WEAVIATE_PORT=8083
@@ -97,6 +99,15 @@ services:
9799
ENABLE_API_BASED_MODULES: 'true'
98100
CLUSTER_HOSTNAME: 'node1'
99101

102+
prometheus:
103+
image: prom/prometheus
104+
volumes:
105+
- ./monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
106+
ports:
107+
- "9090:9090"
108+
networks:
109+
- dev-network
110+
100111
volumes:
101112
postgres_dev_data:
102113
weaviate_dev_data:

docker-compose.yml.j2

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,15 @@ services:
103103
ENABLE_API_BASED_MODULES: 'true'
104104
CLUSTER_HOSTNAME: 'node1'
105105

106+
prometheus:
107+
image: prom/prometheus
108+
volumes:
109+
- ./monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
110+
ports:
111+
- "9090:9090"
112+
networks:
113+
- proxy
114+
106115
volumes:
107116
pgdata:
108117
weaviate_data:

genAi/compose.yml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
services:
2+
genai:
3+
build:
4+
context: .
5+
dockerfile: Dockerfile
6+
container_name: studymate-genai-dev
7+
restart: unless-stopped
8+
ports:
9+
- "8081:8081" # FastAPI GenAI service
10+
environment:
11+
- WEAVIATE_HOST=weaviate
12+
- WEAVIATE_PORT=8083
13+
- PYTHONPATH=/app
14+
env_file:
15+
- .env
16+
depends_on:
17+
- weaviate
18+
volumes:
19+
- ./documents:/app/documents # Mount documents for development
20+
21+
weaviate:
22+
image: cr.weaviate.io/semitechnologies/weaviate:1.30.3
23+
container_name: studymate-weaviate-dev
24+
restart: unless-stopped
25+
command:
26+
- --host
27+
- 0.0.0.0
28+
- --port
29+
- '8083'
30+
- --scheme
31+
- http
32+
ports:
33+
- "8083:8083" # Weaviate vector database
34+
- "50051:50051" # gRPC port
35+
volumes:
36+
- weaviate_dev_data:/var/lib/weaviate
37+
environment:
38+
QUERY_DEFAULTS_LIMIT: 25
39+
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
40+
PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
41+
ENABLE_API_BASED_MODULES: 'true'
42+
CLUSTER_HOSTNAME: 'node1'
43+
44+
volumes:
45+
postgres_dev_data:
46+
weaviate_dev_data:

genAi/main.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import logging
22
from contextlib import asynccontextmanager
33
from fastapi import FastAPI
4+
from fastapi.responses import JSONResponse
45
from helpers import save_document
56
from request_models import CreateSessionRequest, PromptRequest, SummaryRequest, QuizRequest, FlashcardRequest
67
from llm import StudyLLM
8+
from prometheus_fastapi_instrumentator import Instrumentator
79

810
# Configure logging
911
logging.basicConfig(level=logging.INFO)
@@ -39,9 +41,16 @@ async def lifespan(_):
3941
lifespan=lifespan
4042
)
4143

44+
Instrumentator(
45+
excluded_handlers=['/metrics'],
46+
should_group_status_codes=False,
47+
).instrument(app).expose(app)
48+
49+
4250
# llm_instances["dummy"] = StudyLLM("./documents/example/W07_Microservices_and_Scalable_Architectures.pdf") # TODO: remove
4351
# llm_instances["dummy2"] = StudyLLM("./documents/example/dummy_knowledge.txt") # TODO: remove
4452

53+
# Auxiliary Endpoints
4554
@app.get("/health")
4655
async def health_check():
4756
"""Check the health of the service and its dependencies."""
@@ -51,6 +60,7 @@ async def health_check():
5160
return {"status": "unhealthy", "error": str(e)}
5261

5362

63+
# AI Tasks Endpoints
5464
@app.post("/session/load")
5565
async def load_session(data: CreateSessionRequest):
5666
"""
@@ -82,7 +92,7 @@ async def receive_prompt(data: PromptRequest):
8292
if data.session_id not in llm_instances:
8393
error_msg = f"Session {data.session_id} not found. Please ensure the document was processed successfully."
8494
logger.error(error_msg)
85-
return {"response": f"ERROR: {error_msg}"}
95+
return JSONResponse(status_code=404, content={"response": f"ERROR: {error_msg}"})
8696

8797
logger.info(f"Processing chat request for session {data.session_id}")
8898
response = llm_instances[data.session_id].prompt(data.message)

genAi/rag.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
44
from langchain_text_splitters import RecursiveCharacterTextSplitter
55
from langchain_weaviate.vectorstores import WeaviateVectorStore
6-
from langchain_cohere import CohereEmbeddings
76
from langchain_core.documents import Document
87
from dotenv import load_dotenv
98
import os
@@ -14,7 +13,6 @@
1413
# Setup shared embeddings model
1514
load_dotenv()
1615
embeddings_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
17-
# embeddings_model_cohere = CohereEmbeddings(model="embed-english-light-v3.0", cohere_api_key=os.getenv("COHERE_API_KEY"))
1816

1917
# Disable Huggingface's tokenizer parallelism (avoid deadlocks caused by process forking in langchain)
2018
os.environ["TOKENIZERS_PARALLELISM"] = "false"

genAi/requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ langchain-openai
1010
langchain-weaviate
1111
langchain-community
1212
langchain-text-splitters
13-
langchain-cohere
1413
pymupdf # for the PDF loader
1514
langchain_huggingface
1615
sentence-transformers
1716
requests
18-
langchain-core
17+
langchain-core
18+
prometheus-fastapi-instrumentator
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
global:
2+
scrape_interval: 15s
3+
4+
scrape_configs:
5+
- job_name: 'prod'
6+
static_configs:
7+
- targets: ['genai:8081']
8+
labels:
9+
app: "genai-server"
10+
11+
# For dev only (when services are run manually)
12+
- job_name: 'dev'
13+
static_configs:
14+
- targets: ['host.docker.internal:8000'] # genai
15+
labels:
16+
app: "genai-server"

0 commit comments

Comments
 (0)