Skip to content

Commit 1c50f4c

Browse files
authored
feat(api): add prometheus and grafana (#3)
* docs: add contribution instructions * chore: add pre-commit linting and checking poetry * feat(api): add prometheus and grafana There is a decorator to translate API that emits metrics of /translate request count and response time that tags source and destination language
1 parent 14d83da commit 1c50f4c

File tree

12 files changed

+533
-6
lines changed

12 files changed

+533
-6
lines changed

.pre-commit-config.yaml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# See https://pre-commit.com for more information
2+
# See https://pre-commit.com/hooks.html for more hooks
3+
repos:
4+
- repo: https://github.com/pre-commit/pre-commit-hooks
5+
rev: v4.5.0
6+
hooks:
7+
- id: trailing-whitespace
8+
- id: end-of-file-fixer
9+
- id: check-yaml
10+
- id: check-added-large-files
11+
- id: check-merge-conflict
12+
13+
- repo: local
14+
hooks:
15+
- id: poetry-check
16+
name: Check if Poetry lock file is up to date
17+
entry: bash -c 'poetry check && poetry lock'
18+
language: system
19+
pass_filenames: false
20+
files: ^pyproject\.toml$
21+
22+
- id: lint
23+
name: Run linting checks
24+
entry: bash -c 'make lint'
25+
language: system
26+
pass_filenames: false
27+
types: [python]

babeltron/app/main.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33
from importlib.metadata import version
44
from typing import AsyncIterator
55

6-
from fastapi import FastAPI
6+
from fastapi import FastAPI, Response
77
from fastapi.middleware.cors import CORSMiddleware
88
from fastapi_cache import FastAPICache
99
from fastapi_cache.backends.inmemory import InMemoryBackend
1010
from fastapi_cache.backends.redis import RedisBackend
1111
from redis import asyncio as aioredis
1212

13+
from babeltron.app.monitoring import PrometheusMiddleware, metrics_endpoint
1314
from babeltron.app.utils import include_routers
1415

1516
try:
@@ -20,7 +21,7 @@
2021

2122
@asynccontextmanager
2223
async def lifespan(_: FastAPI) -> AsyncIterator[None]:
23-
cache_url = os.environ.get("CACHE_URL")
24+
cache_url = os.environ.get("CACHE_URL", "")
2425

2526
if cache_url.startswith("in-memory"):
2627
FastAPICache.init(InMemoryBackend(), prefix="babeltron")
@@ -40,9 +41,9 @@ async def lifespan(_: FastAPI) -> AsyncIterator[None]:
4041
description="API for machine translation using NLLB models",
4142
version="0.1.0",
4243
contact={
43-
"name": "Your Name",
44-
"url": "https://your-website.com",
45-
"email": "your-email@example.com",
44+
"name": "Pedro Soares",
45+
"url": "https://github.com/hspedro",
46+
"email": "pedrofigueiredoc@gmail.com",
4647
},
4748
license_info={
4849
"name": "MIT",
@@ -66,6 +67,16 @@ async def lifespan(_: FastAPI) -> AsyncIterator[None]:
6667
# Include all routers
6768
include_routers(app)
6869

70+
# Add Prometheus middleware
71+
app.add_middleware(PrometheusMiddleware)
72+
73+
74+
# Add metrics endpoint
75+
@app.get("/metrics", include_in_schema=False)
76+
async def metrics():
77+
return Response(content=metrics_endpoint(), media_type="text/plain")
78+
79+
6980
if __name__ == "__main__":
7081
import uvicorn
7182

babeltron/app/monitoring.py

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
import time
2+
3+
from fastapi import Request
4+
from prometheus_client import CollectorRegistry, Counter, Histogram, generate_latest
5+
from starlette.middleware.base import BaseHTTPMiddleware
6+
7+
# Create a registry
8+
registry = CollectorRegistry()
9+
10+
# Define metrics
11+
REQUEST_COUNT = Counter(
12+
"http_requests_total",
13+
"Total count of HTTP requests",
14+
["method", "endpoint", "status_code"],
15+
registry=registry,
16+
)
17+
18+
REQUEST_LATENCY = Histogram(
19+
"http_request_duration_seconds",
20+
"HTTP request latency in seconds",
21+
["method", "endpoint"],
22+
buckets=(
23+
0.01,
24+
0.025,
25+
0.05,
26+
0.075,
27+
0.1,
28+
0.25,
29+
0.5,
30+
0.75,
31+
1.0,
32+
2.5,
33+
5.0,
34+
7.5,
35+
10.0,
36+
25.0,
37+
50.0,
38+
),
39+
registry=registry,
40+
)
41+
42+
ERROR_COUNT = Counter(
43+
"http_request_errors_total",
44+
"Total count of HTTP request errors",
45+
["method", "endpoint", "exception_type"],
46+
registry=registry,
47+
)
48+
49+
TRANSLATION_COUNT = Counter(
50+
"translation_requests_total",
51+
"Total count of translation requests",
52+
["src_lang", "tgt_lang"],
53+
registry=registry,
54+
)
55+
56+
TRANSLATION_LATENCY = Histogram(
57+
"translation_duration_seconds",
58+
"Translation processing time in seconds",
59+
["src_lang", "tgt_lang"],
60+
buckets=(
61+
0.05,
62+
0.1,
63+
0.25,
64+
0.5,
65+
0.75,
66+
1.0,
67+
1.5,
68+
2.0,
69+
2.5,
70+
3.0,
71+
3.5,
72+
4,
73+
4.5,
74+
5.0,
75+
7.5,
76+
10.0,
77+
15.0,
78+
20.0,
79+
30.0,
80+
60.0,
81+
),
82+
registry=registry,
83+
)
84+
85+
CACHE_HIT_COUNT = Counter(
86+
"cache_hits_total", "Total count of cache hits", ["endpoint"], registry=registry
87+
)
88+
89+
CACHE_MISS_COUNT = Counter(
90+
"cache_misses_total", "Total count of cache misses", ["endpoint"], registry=registry
91+
)
92+
93+
MODEL_LOAD_TIME = Histogram(
94+
"model_load_time_seconds",
95+
"Time taken to load the model",
96+
["model_size"],
97+
buckets=(0.1, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0, 120.0, 300.0),
98+
registry=registry,
99+
)
100+
101+
102+
class PrometheusMiddleware(BaseHTTPMiddleware):
103+
def __init__(self, app=None):
104+
super().__init__(app)
105+
106+
async def dispatch(self, request: Request, call_next):
107+
start_time = time.time()
108+
109+
# Get the route path for the request
110+
route = request.url.path
111+
method = request.method
112+
113+
try:
114+
response = await call_next(request)
115+
116+
# Record request count and latency
117+
REQUEST_COUNT.labels(
118+
method=method, endpoint=route, status_code=response.status_code
119+
).inc()
120+
REQUEST_LATENCY.labels(method=method, endpoint=route).observe(
121+
time.time() - start_time
122+
)
123+
124+
# Record error if status code is 4xx or 5xx
125+
if 400 <= response.status_code < 600:
126+
ERROR_COUNT.labels(
127+
method=method,
128+
endpoint=route,
129+
exception_type=f"HTTP{response.status_code}",
130+
).inc()
131+
132+
return response
133+
134+
except Exception as e:
135+
# Record exception
136+
ERROR_COUNT.labels(
137+
method=method, endpoint=route, exception_type=type(e).__name__
138+
).inc()
139+
raise
140+
141+
142+
def track_dynamic_translation_metrics():
143+
def decorator(func):
144+
from functools import wraps
145+
146+
@wraps(func)
147+
async def wrapper(*args, **kwargs):
148+
request = kwargs.get("request")
149+
if not request and args:
150+
request = args[0]
151+
152+
start_time = time.time()
153+
src_lang = request.src_lang
154+
tgt_lang = request.tgt_lang
155+
156+
TRANSLATION_COUNT.labels(src_lang=src_lang, tgt_lang=tgt_lang).inc()
157+
158+
result = await func(*args, **kwargs)
159+
160+
TRANSLATION_LATENCY.labels(src_lang=src_lang, tgt_lang=tgt_lang).observe(
161+
time.time() - start_time
162+
)
163+
164+
return result
165+
166+
return wrapper
167+
168+
return decorator
169+
170+
171+
def metrics_endpoint():
172+
"""Generate latest metrics in Prometheus format"""
173+
return generate_latest(registry)

babeltron/app/routers/translate.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pydantic import BaseModel, Field
77
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
88

9+
from babeltron.app.monitoring import track_dynamic_translation_metrics
910
from babeltron.app.utils import ORJsonCoder, cache_key_builder, get_model_path
1011

1112
router = APIRouter(tags=["Translation"])
@@ -77,6 +78,7 @@ class TranslationResponse(BaseModel):
7778
status_code=status.HTTP_200_OK,
7879
)
7980
@cache(expire=CACHE_TTL_SECONDS, key_builder=cache_key_builder, coder=ORJsonCoder)
81+
@track_dynamic_translation_metrics()
8082
async def translate(request: TranslationRequest):
8183
if model is None or tokenizer is None:
8284
raise HTTPException(

docker-compose.yml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,42 @@ services:
1212
environment:
1313
- MODEL_PATH=/models
1414
- CACHE_URL=in-memory
15+
- METRICS_ENABLED=true
1516
restart: unless-stopped
1617
healthcheck:
1718
test: ["CMD", "curl", "-f", "http://localhost:8000/healthz"]
1819
interval: 30s
1920
timeout: 10s
2021
retries: 3
2122
start_period: 40s
23+
24+
prometheus:
25+
image: prom/prometheus:latest
26+
container_name: prometheus
27+
ports:
28+
- "9090:9090"
29+
volumes:
30+
- ./prometheus.yml:/etc/prometheus/prometheus.yml
31+
command:
32+
- '--config.file=/etc/prometheus/prometheus.yml'
33+
- '--storage.tsdb.path=/prometheus'
34+
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
35+
- '--web.console.templates=/usr/share/prometheus/consoles'
36+
depends_on:
37+
- api
38+
39+
grafana:
40+
image: grafana/grafana:latest
41+
container_name: grafana
42+
ports:
43+
- "3000:3000"
44+
environment:
45+
- GF_SECURITY_ADMIN_USER=admin
46+
- GF_SECURITY_ADMIN_PASSWORD=admin
47+
volumes:
48+
- grafana-storage:/var/lib/grafana
49+
depends_on:
50+
- prometheus
51+
52+
volumes:
53+
grafana-storage:

docs/CONTRIBUTING.md

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Contributing
2+
3+
We welcome contributions to Babeltron! This document provides guidelines for setting up your development environment and ensuring code quality.
4+
5+
## Development Setup
6+
7+
1. Clone the repository:
8+
```bash
9+
git clone https://github.com/hspedro/babeltron.git
10+
cd babeltron
11+
```
12+
13+
2. Install dependencies:
14+
```bash
15+
make install
16+
```
17+
18+
3. Install pre-commit hooks:
19+
```bash
20+
make pre-commit-install
21+
```
22+
23+
## Pre-commit Hooks
24+
25+
Babeltron uses pre-commit hooks to ensure code quality and consistency. These hooks run automatically when you commit changes and check for:
26+
27+
- Outdated Poetry lock files
28+
- Linting issues
29+
- Trailing whitespace and file formatting issues
30+
- YAML syntax errors
31+
- Merge conflicts
32+
33+
To run the pre-commit hooks manually on all files:
34+
35+
```bash
36+
pre-commit run --all-files
37+
```
38+
39+
## Code Style
40+
41+
Babeltron follows these code style guidelines:
42+
43+
- [Black](https://github.com/psf/black) for code formatting
44+
- [isort](https://pycqa.github.io/isort/) for import sorting
45+
46+
You can run these checks manually with:
47+
48+
```bash
49+
make lint
50+
```
51+
52+
## Testing
53+
54+
Before submitting a pull request, make sure all tests pass:
55+
56+
```bash
57+
make test
58+
```
59+
60+
To run tests with coverage reporting:
61+
62+
```bash
63+
make coverage
64+
```
65+
66+
## Pull Request Process
67+
68+
1. Fork the repository
69+
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
70+
3. Make your changes
71+
4. Run tests and linting checks
72+
5. Commit your changes (`git commit -m 'Add amazing feature'`)
73+
6. Push to the branch (`git push origin feature/amazing-feature`)
74+
7. Open a Pull Request
75+
76+
## Running locally
77+
78+
```bash
79+
make serve
80+
```
81+
82+
Will mostly do the trick since it has auto-reload.

0 commit comments

Comments
 (0)