Skip to content

Commit 82dd7fa

Browse files
authored
refactor(tracing): add jaeger thrift exporter (#9)
* refactor(tracing): add jaeger thrift exporter * chore(ci): dynamic badge
1 parent 962f869 commit 82dd7fa

File tree

9 files changed

+234
-106
lines changed

9 files changed

+234
-106
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
[![Tests](https://github.com/hspedro/babeltron/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/hspedro/babeltron/actions/workflows/test.yml)
44
[![Python Version](https://img.shields.io/badge/python-3.9%2B-blue)](https://www.python.org/downloads/)
55
[![License](https://img.shields.io/github/license/hspedro/babeltron)](https://github.com/hspedro/babeltron/blob/main/LICENSE)
6-
[![Version](https://img.shields.io/badge/version-0.3.1-green)](https://github.com/hspedro/babeltron/releases)
6+
[![Version](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/hspedro/babeltron/main/.github/badges/version.json)](https://github.com/hspedro/babeltron/releases)
77
[![PyPI version](https://badge.fury.io/py/babeltron.svg)](https://badge.fury.io/py/babeltron)
88
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
99
[![Coverage](https://img.shields.io/badge/coverage-90%25-brightgreen.svg)](https://github.com/hspedro/babeltron/actions/workflows/test.yml)

babeltron/app/main.py

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,30 @@
1+
import importlib.metadata
2+
import logging
13
import os
2-
from importlib.metadata import version
34

4-
from fastapi import FastAPI, Response
5+
from fastapi import FastAPI, HTTPException, status
56
from fastapi.middleware.cors import CORSMiddleware
7+
from starlette.responses import JSONResponse, PlainTextResponse
68

79
from babeltron.app.middlewares.auth import BasicAuthMiddleware
10+
from babeltron.app.models.m2m import M2MTranslationModel
811
from babeltron.app.monitoring import PrometheusMiddleware, metrics_endpoint
912
from babeltron.app.tracing import setup_jaeger
1013
from babeltron.app.utils import include_routers
1114

15+
# Get version from package metadata
1216
try:
13-
__version__ = version("babeltron")
14-
except ImportError:
15-
__version__ = "dev"
17+
__version__ = importlib.metadata.version("babeltron")
18+
except importlib.metadata.PackageNotFoundError:
19+
# If package is not installed, try to get version from pyproject.toml
20+
try:
21+
import tomli
22+
23+
with open("pyproject.toml", "rb") as f:
24+
pyproject = tomli.load(f)
25+
__version__ = pyproject["project"]["version"]
26+
except (FileNotFoundError, KeyError, ImportError):
27+
__version__ = "dev"
1628

1729

1830
def create_app() -> FastAPI:
@@ -68,6 +80,8 @@ def create_app() -> FastAPI:
6880
"/healthz",
6981
"/readyz",
7082
"/metrics",
83+
"/version", # Add version endpoint to excluded paths
84+
"/version-badge", # Add version badge endpoint to excluded paths
7185
],
7286
)
7387

@@ -80,17 +94,50 @@ def create_app() -> FastAPI:
8094
# Add Prometheus middleware
8195
app.add_middleware(PrometheusMiddleware)
8296

97+
# Add version endpoint for badge
98+
@app.get("/version", response_class=PlainTextResponse, include_in_schema=False)
99+
async def get_version():
100+
return __version__
101+
102+
# Add version badge endpoint for Shields.io
103+
@app.get("/version-badge", response_class=JSONResponse, include_in_schema=False)
104+
async def get_version_badge():
105+
return {
106+
"schemaVersion": 1,
107+
"label": "version",
108+
"message": __version__,
109+
"color": "green",
110+
"cacheSeconds": 3600, # Cache for 1 hour
111+
}
112+
113+
@app.get("/healthz", include_in_schema=False)
114+
async def health():
115+
return {"status": "ok"}
116+
117+
@app.get("/readyz", include_in_schema=False)
118+
async def ready():
119+
# Check if model is loaded
120+
try:
121+
# Just initialize the model to check if it loads correctly
122+
M2MTranslationModel()
123+
return {"status": "ready"}
124+
except Exception as e:
125+
logging.error(f"Readiness check failed: {e}")
126+
raise HTTPException(
127+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
128+
detail="Service not ready",
129+
)
130+
131+
@app.get("/metrics", include_in_schema=False)
132+
async def metrics():
133+
return PlainTextResponse(content=metrics_endpoint())
134+
83135
return app
84136

85137

86138
app = create_app()
87139

88140

89-
@app.get("/metrics", include_in_schema=False)
90-
async def metrics():
91-
return Response(content=metrics_endpoint(), media_type="text/plain")
92-
93-
94141
if __name__ == "__main__":
95142
import uvicorn
96143

babeltron/app/routers/translate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class TranslationRequest(BaseModel):
2929
)
3030

3131
class Config:
32-
schema_extra = {
32+
json_schema_extra = {
3333
"example": {
3434
"text": "Hello, how are you?",
3535
"src_lang": "en",

babeltron/app/tracing.py

Lines changed: 70 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
if not IN_TEST:
99
from opentelemetry import trace
10+
from opentelemetry.exporter.jaeger.thrift import JaegerExporter
1011
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
1112
OTLPSpanExporter as OTLPSpanExporterGRPC,
1213
)
@@ -15,10 +16,12 @@
1516
)
1617
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
1718
from opentelemetry.instrumentation.logging import LoggingInstrumentor
19+
from opentelemetry.sdk.resources import Resource
1820
from opentelemetry.sdk.trace import TracerProvider
19-
from opentelemetry.sdk.trace.export import BatchSpanProcessor
21+
from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
22+
from opentelemetry.semconv.resource import ResourceAttributes
2023
else:
21-
# Create dummy classes for test environment
24+
2225
class DummyTracerProvider:
2326
def add_span_processor(self, processor):
2427
pass
@@ -43,53 +46,85 @@ def instrument_app(app, **kwargs):
4346
FastAPIInstrumentor = DummyInstrumentor
4447
LoggingInstrumentor = DummyInstrumentor
4548

46-
OTLP_MODE = os.environ.get("OTLP_MODE", "otlp-grpc")
47-
OTLP_GRPC_ENDPOINT = os.environ.get("OTLP_GRPC_ENDPOINT", "otel-collector:4317")
48-
OTLP_HTTP_ENDPOINT = os.environ.get(
49-
"OTLP_HTTP_ENDPOINT", "http://otel-collector:4318/v1/traces"
49+
# Configuration options
50+
OTLP_MODE = os.environ.get("OTLP_MODE", "otlp-grpc") # Default to otlp-grpc
51+
OTLP_COLLECTOR_HOST = os.environ.get("OTLP_COLLECTOR_HOST", "otel-collector")
52+
OTLP_COLLECTOR_PORT = os.environ.get("OTLP_COLLECTOR_PORT", "4317")
53+
JAEGER_AGENT_HOST = os.environ.get(
54+
"JAEGER_AGENT_HOST", "localhost" if IN_TEST else "jaeger"
5055
)
56+
JAEGER_AGENT_PORT = int(os.environ.get("JAEGER_AGENT_PORT", "6831"))
57+
SERVICE_NAME = os.environ.get("OTEL_SERVICE_NAME", "babeltron")
5158

5259

5360
def setup_jaeger(app: FastAPI, log_correlation: bool = True) -> None:
5461
if IN_TEST:
55-
logging.info("Skipping OpenTelemetry setup in test environment")
62+
logging.info("Using minimal OpenTelemetry setup for test environment")
63+
# For tests, we'll set up a minimal tracer that doesn't try to connect to external services
64+
resource = Resource.create(
65+
{ResourceAttributes.SERVICE_NAME: f"{SERVICE_NAME}-test"}
66+
)
67+
tracer = TracerProvider(resource=resource)
68+
trace.set_tracer_provider(tracer)
69+
70+
# In tests, we'll use the console exporter which doesn't require external connections
71+
# This is optional and can be disabled if you don't want any tracing in tests
72+
if os.environ.get("OTEL_TEST_EXPORT", "false").lower() == "true":
73+
tracer.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
74+
logging.info("Console span exporter enabled for tests")
75+
76+
# Instrument the app for tests if needed
77+
FastAPIInstrumentor.instrument_app(
78+
app,
79+
tracer_provider=tracer,
80+
excluded_urls="/metrics,/healthz,/readyz,/docs,/redoc,/openapi.json",
81+
)
5682
return
5783

58-
if OTLP_GRPC_ENDPOINT.lower() == "disabled":
84+
if OTLP_MODE.lower() == "disabled":
5985
logging.info("OpenTelemetry tracing is disabled")
6086
return
6187

62-
tracer = TracerProvider()
88+
# Create a resource with service name
89+
resource = Resource.create({ResourceAttributes.SERVICE_NAME: SERVICE_NAME})
90+
91+
# Create tracer provider with resource
92+
tracer = TracerProvider(resource=resource)
6393
trace.set_tracer_provider(tracer)
6494

6595
if OTLP_MODE == "otlp-grpc":
66-
if not IN_TEST:
67-
tracer.add_span_processor(
68-
BatchSpanProcessor(
69-
OTLPSpanExporterGRPC(endpoint=OTLP_GRPC_ENDPOINT, insecure=True)
70-
)
71-
)
96+
# Use OTLP gRPC exporter to send to the OpenTelemetry Collector
97+
endpoint = f"{OTLP_COLLECTOR_HOST}:{OTLP_COLLECTOR_PORT}"
98+
tracer.add_span_processor(
99+
BatchSpanProcessor(OTLPSpanExporterGRPC(endpoint=endpoint, insecure=True))
100+
)
101+
logging.info(f"OTLP gRPC exporter enabled with endpoint: {endpoint}")
72102
elif OTLP_MODE == "otlp-http":
73-
if not IN_TEST:
74-
tracer.add_span_processor(
75-
BatchSpanProcessor(OTLPSpanExporterHTTP(endpoint=OTLP_HTTP_ENDPOINT))
76-
)
103+
# Use OTLP HTTP exporter to send to the OpenTelemetry Collector
104+
endpoint = f"{OTLP_COLLECTOR_HOST}:{OTLP_COLLECTOR_PORT}"
105+
tracer.add_span_processor(
106+
BatchSpanProcessor(OTLPSpanExporterHTTP(endpoint=endpoint))
107+
)
108+
logging.info(f"OTLP HTTP exporter enabled with endpoint: {endpoint}")
77109
else:
78-
if not IN_TEST:
79-
tracer.add_span_processor(
80-
BatchSpanProcessor(
81-
OTLPSpanExporterGRPC(endpoint=OTLP_GRPC_ENDPOINT, insecure=True)
82-
)
83-
)
84-
85-
if log_correlation and not IN_TEST:
86-
LoggingInstrumentor().instrument(set_logging_format=True)
87-
88-
if not IN_TEST:
89-
FastAPIInstrumentor.instrument_app(
90-
app,
91-
tracer_provider=tracer,
92-
excluded_urls="/metrics,/healthz,/readyz,/docs,/redoc,/openapi.json",
110+
# Use Jaeger Thrift exporter (deprecated but still functional)
111+
jaeger_exporter = JaegerExporter(
112+
agent_host_name=JAEGER_AGENT_HOST,
113+
agent_port=JAEGER_AGENT_PORT,
114+
)
115+
tracer.add_span_processor(BatchSpanProcessor(jaeger_exporter))
116+
logging.info(
117+
f"Jaeger Thrift exporter enabled with agent: {JAEGER_AGENT_HOST}:{JAEGER_AGENT_PORT}"
118+
)
119+
logging.warning(
120+
"Note: The Jaeger Thrift exporter is deprecated. Consider migrating to OTLP."
93121
)
94122

95-
logging.info(f"OpenTelemetry tracing enabled with endpoint: {OTLP_GRPC_ENDPOINT}")
123+
if log_correlation:
124+
LoggingInstrumentor().instrument(set_logging_format=True)
125+
126+
FastAPIInstrumentor.instrument_app(
127+
app,
128+
tracer_provider=tracer,
129+
excluded_urls="/metrics,/healthz,/readyz,/docs,/redoc,/openapi.json",
130+
)

docker-compose.yml

Lines changed: 32 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ services:
1414
- METRICS_ENABLED=true
1515
- OTLP_MODE=otlp-grpc
1616
- OTEL_SERVICE_NAME=babeltron
17-
- OTLP_GRPC_ENDPOINT=otel-collector:4317
17+
- OTLP_COLLECTOR_HOST=otel-collector
18+
- OTLP_COLLECTOR_PORT=4317
1819
- API_PASSWORD=translation2025
1920
- WORKER_COUNT=2
2021
restart: unless-stopped
@@ -24,6 +25,8 @@ services:
2425
timeout: 10s
2526
retries: 3
2627
start_period: 40s
28+
depends_on:
29+
- otel-collector
2730

2831
prometheus:
2932
image: prom/prometheus:latest
@@ -53,58 +56,41 @@ services:
5356
depends_on:
5457
- prometheus
5558

56-
jaeger-collector:
57-
container_name: jaeger-collector
58-
image: jaegertracing/jaeger-collector:1.57.0
59-
command:
60-
- "--cassandra.keyspace=jaeger_v1_dc1"
61-
- "--cassandra.servers=cassandra"
62-
- "--collector.otlp.enabled=true"
63-
environment:
64-
- SAMPLING_CONFIG_TYPE=adaptive
65-
ports:
66-
- "4317" # accept OpenTelemetry Protocol (OTLP) over gRPC
67-
- "4318" # accept OpenTelemetry Protocol (OTLP) over HTTP
68-
restart: on-failure
69-
depends_on:
70-
- cassandra-schema
71-
72-
cassandra:
73-
container_name: cassandra
74-
image: cassandra:4.1.4
75-
76-
cassandra-schema:
77-
container_name: cassandra-schema
78-
image: jaegertracing/jaeger-cassandra-schema:1.57.0
79-
depends_on:
80-
- cassandra
81-
82-
jaeger-query:
83-
container_name: jaeger-query
84-
image: jaegertracing/jaeger-query:1.57.0
85-
command:
86-
- "--cassandra.keyspace=jaeger_v1_dc1"
87-
- "--cassandra.servers=cassandra"
59+
jaeger:
60+
image: jaegertracing/all-in-one:latest
61+
container_name: jaeger
8862
ports:
89-
- "16686:16686"
90-
- "16687:16687"
91-
restart: on-failure
92-
depends_on:
93-
- cassandra-schema
63+
- "6831:6831/udp" # Jaeger thrift compact (agent)
64+
- "6832:6832/udp" # Jaeger thrift binary (agent)
65+
- "5778:5778" # Agent configs
66+
- "16686:16686" # Jaeger UI
67+
- "14250:14250" # Model/collector gRPC
68+
- "14268:14268" # Jaeger HTTP (collector)
69+
- "14269:14269" # Admin port
70+
environment:
71+
- COLLECTOR_OTLP_ENABLED=true
72+
- COLLECTOR_ZIPKIN_HOST_PORT=:9411
73+
- SAMPLING_STRATEGIES_FILE=/etc/jaeger/sampling.json
74+
- LOG_LEVEL=debug
75+
volumes:
76+
- ./etc/jaeger-sampling.json:/etc/jaeger/sampling.json
77+
restart: unless-stopped
9478

9579
otel-collector:
96-
image: otel/opentelemetry-collector-contrib:0.100.0
80+
image: otel/opentelemetry-collector-contrib:latest
9781
container_name: otel-collector
98-
command:
99-
- "--config=/conf/config.yaml"
82+
command: ["--config=/etc/otel-collector-config.yaml"]
10083
volumes:
101-
- ./etc/otel-collector-config.yaml:/conf/config.yaml
84+
- ./etc/otel-collector-config.yaml:/etc/otel-collector-config.yaml
10285
ports:
103-
- "4317" # OTLP gRPC receiver
104-
- "4318" # OTLP http receiver
105-
restart: on-failure
86+
- "4317:4317" # OTLP gRPC receiver
87+
- "4318:4318" # OTLP HTTP receiver
88+
- "8888:8888" # Prometheus metrics exposed by the collector
89+
- "8889:8889" # Prometheus exporter metrics
90+
- "13133:13133" # Health check extension
91+
restart: unless-stopped
10692
depends_on:
107-
- jaeger-collector
93+
- jaeger
10894

10995
volumes:
11096
grafana-storage:

0 commit comments

Comments
 (0)