-
Notifications
You must be signed in to change notification settings - Fork 0
Fix configuration caching and telemetry exporter flags #150
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,133 @@ | ||
| """Utilities to configure OpenTelemetry exporters for the API service.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import logging | ||
| import os | ||
| from typing import Any, Final | ||
|
|
||
| _LOGGER = logging.getLogger(__name__) | ||
|
|
||
| _TRUE_VALUES: Final[set[str]] = {"1", "true", "t", "yes", "y", "on"} | ||
| _BOOTSTRAPPED: bool = False | ||
|
|
||
|
|
||
| def _env_flag(name: str, *, default: bool = False) -> bool: | ||
| """Parse a boolean environment variable using common truthy values.""" | ||
|
|
||
| raw_value = os.getenv(name) | ||
| if raw_value is None: | ||
| return default | ||
|
|
||
| value = raw_value.strip().lower() | ||
| if not value: | ||
| return default | ||
| return value in _TRUE_VALUES | ||
|
|
||
|
|
||
| def _parse_headers(raw_headers: str) -> dict[str, str]: | ||
| """Convert OTLP header strings into a mapping.""" | ||
|
|
||
| parsed: dict[str, str] = {} | ||
| for item in raw_headers.split(","): | ||
| if "=" not in item: | ||
| continue | ||
| key, value = item.split("=", 1) | ||
| key = key.strip() | ||
| if not key: | ||
| continue | ||
| parsed[key] = value.strip() | ||
| return parsed | ||
|
|
||
|
|
||
| def _span_exporter_kwargs() -> dict[str, Any]: | ||
| """Build keyword arguments for the OTLP span exporter.""" | ||
|
|
||
| kwargs: dict[str, Any] = { | ||
| "insecure": _env_flag("OTEL_EXPORTER_OTLP_INSECURE", default=False) | ||
| } | ||
|
|
||
| endpoint = os.getenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT") or os.getenv( | ||
| "OTEL_EXPORTER_OTLP_ENDPOINT" | ||
| ) | ||
| if endpoint: | ||
| kwargs["endpoint"] = endpoint | ||
|
|
||
| headers = os.getenv("OTEL_EXPORTER_OTLP_TRACES_HEADERS") or os.getenv( | ||
| "OTEL_EXPORTER_OTLP_HEADERS" | ||
| ) | ||
| if headers: | ||
| kwargs["headers"] = _parse_headers(headers) | ||
|
|
||
| return kwargs | ||
|
|
||
|
|
||
| def _metric_exporter_kwargs() -> dict[str, Any]: | ||
| """Build keyword arguments for the OTLP metric exporter.""" | ||
|
|
||
| kwargs: dict[str, Any] = { | ||
| "insecure": _env_flag("OTEL_EXPORTER_OTLP_METRICS_INSECURE", default=False) | ||
| } | ||
|
|
||
| endpoint = os.getenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT") or os.getenv( | ||
| "OTEL_EXPORTER_OTLP_ENDPOINT" | ||
| ) | ||
| if endpoint: | ||
| kwargs["endpoint"] = endpoint | ||
|
|
||
| headers = os.getenv("OTEL_EXPORTER_OTLP_METRICS_HEADERS") or os.getenv( | ||
| "OTEL_EXPORTER_OTLP_HEADERS" | ||
| ) | ||
| if headers: | ||
| kwargs["headers"] = _parse_headers(headers) | ||
|
|
||
| return kwargs | ||
|
|
||
|
|
||
| def bootstrap() -> None: | ||
| """Initialise OpenTelemetry tracing and metrics exporters.""" | ||
|
|
||
| global _BOOTSTRAPPED | ||
|
|
||
| if _BOOTSTRAPPED: | ||
| return | ||
|
|
||
| try: | ||
| from opentelemetry import metrics as otel_metrics | ||
| from opentelemetry import trace as otel_trace | ||
| from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import ( | ||
| OTLPMetricExporter, | ||
| ) | ||
| from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( | ||
| OTLPSpanExporter, | ||
| ) | ||
| from opentelemetry.sdk.metrics import MeterProvider | ||
| from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader | ||
| from opentelemetry.sdk.resources import Resource | ||
| from opentelemetry.sdk.trace import TracerProvider | ||
| from opentelemetry.sdk.trace.export import BatchSpanProcessor | ||
| except ImportError as exc: # pragma: no cover - optional dependency guard | ||
| _LOGGER.debug("OpenTelemetry packages are unavailable: %s", exc) | ||
| return | ||
|
|
||
| resource = Resource.create( | ||
| { | ||
| "service.name": os.getenv("OTEL_SERVICE_NAME", "paform-backend"), | ||
| "service.namespace": os.getenv("OTEL_SERVICE_NAMESPACE", "paform"), | ||
| } | ||
| ) | ||
|
|
||
| tracer_provider = TracerProvider(resource=resource) | ||
| span_exporter = OTLPSpanExporter(**_span_exporter_kwargs()) | ||
| tracer_provider.add_span_processor(BatchSpanProcessor(span_exporter)) | ||
| otel_trace.set_tracer_provider(tracer_provider) | ||
|
|
||
| metric_exporter = OTLPMetricExporter(**_metric_exporter_kwargs()) | ||
| meter_provider = MeterProvider( | ||
| resource=resource, | ||
| metric_readers=[PeriodicExportingMetricReader(metric_exporter)], | ||
| ) | ||
| otel_metrics.set_meter_provider(meter_provider) | ||
|
|
||
| _BOOTSTRAPPED = True | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -19,14 +19,26 @@ | |||||||||||
| from api.routes_sync import router as sync_router | ||||||||||||
| from api.routes_observability import router as observability_router | ||||||||||||
| from prometheus_client import CONTENT_TYPE_LATEST, generate_latest | ||||||||||||
| from prometheus_fastapi_instrumentator import Instrumentator | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chainGuard optional dependency and prevent metric duplication/conflicts
Recommended changes: -from prometheus_fastapi_instrumentator import Instrumentator
+try: # pragma: no cover - optional dependency guard
+ from prometheus_fastapi_instrumentator import Instrumentator
+except ImportError: # pragma: no cover
+ Instrumentator = None # type: ignore[assignment]-# Register Prometheus instrumentation so HTTP metrics are emitted for alerts/dashboards
-Instrumentator().instrument(app)
+# Register Prometheus instrumentation only if available; exclude /metrics
+if Instrumentator is not None:
+ Instrumentator().instrument(app, excluded_handlers=["/metrics"])Also decide on a single HTTP request metric source (either Instrumentator or your custom middleware + metrics.py) to avoid duplicate series. Run a quick web check to confirm default metric names from Instrumentator: Also applies to: 53-53 🌐 Web query: 💡 Result: Short answer: Yes — prometheus_fastapi_instrumentator registers a Counter named http_requests_total by default, and that can conflict with an existing metric of the same name (you'll get duplicate/registration errors). Sources:
If you want, I can show ways to avoid the conflict (use a custom registry/namespace/subsystem or disable the default metric). Length of output: 1984 Length of output: 4730 Length of output: 99 Guard optional dependency and fix metric duplication conflict Verification confirms both critical issues:
Required changes: Guard the import: -from prometheus_fastapi_instrumentator import Instrumentator
+try: # pragma: no cover - optional dependency guard
+ from prometheus_fastapi_instrumentator import Instrumentator
+except ImportError: # pragma: no cover
+ Instrumentator = None # type: ignore[assignment]Conditionally register and avoid metric collision: -Instrumentator().instrument(app)
+if Instrumentator is not None:
+ Instrumentator().instrument(app, excluded_handlers=["/metrics"])Additionally, remove 📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||
| from api.metrics import observe_http_request | ||||||||||||
|
|
||||||||||||
| try: # pragma: no cover - instrumentation is optional in some environments | ||||||||||||
| from api.instrumentation_boot import bootstrap as bootstrap_instrumentation | ||||||||||||
| except ImportError: # pragma: no cover - optional dependency guard | ||||||||||||
| bootstrap_instrumentation = None # type: ignore[assignment] | ||||||||||||
|
|
||||||||||||
| # Configure logging | ||||||||||||
| logging.basicConfig( | ||||||||||||
| level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" | ||||||||||||
| ) | ||||||||||||
| logger = logging.getLogger(__name__) | ||||||||||||
|
|
||||||||||||
| if bootstrap_instrumentation is not None: # pragma: no branch - simple guard | ||||||||||||
| try: | ||||||||||||
| bootstrap_instrumentation() | ||||||||||||
| except Exception: # pragma: no cover - defensive guard | ||||||||||||
| logger.exception("Failed to bootstrap OpenTelemetry instrumentation") | ||||||||||||
|
|
||||||||||||
|
Comment on lines
+31
to
+36
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick | 🔵 Trivial Bootstrap timing is fine; avoid re-instantiating Settings later Bootstrapping at import-time with a try/except is acceptable. Note: below, code constructs Settings again via _Settings(); prefer the cached settings variable to avoid double parsing and to keep behavior consistent with get_settings(). Would you like a follow-up diff to switch to the existing settings instance for the SQLite check? 🤖 Prompt for AI Agents |
||||||||||||
| # Initialize settings | ||||||||||||
| settings = get_settings() | ||||||||||||
|
|
||||||||||||
|
|
@@ -74,13 +86,15 @@ async def dispatch(self, request: Request, call_next): # type: ignore[override] | |||||||||||
| start = perf_counter() | ||||||||||||
| try: | ||||||||||||
| response = await call_next(request) | ||||||||||||
| if response is None: # pragma: no cover - FastAPI guarantees a Response | ||||||||||||
| raise RuntimeError("ASGI call_next returned None response") | ||||||||||||
| except Exception: | ||||||||||||
|
||||||||||||
| duration = perf_counter() - start | ||||||||||||
| observe_http_request( | ||||||||||||
| service="backend", | ||||||||||||
| route=_resolve_route_label(request), | ||||||||||||
| method=request.method, | ||||||||||||
| status="500", | ||||||||||||
| status_code=500, | ||||||||||||
| duration_seconds=duration, | ||||||||||||
| ) | ||||||||||||
| raise | ||||||||||||
|
|
@@ -90,7 +104,7 @@ async def dispatch(self, request: Request, call_next): # type: ignore[override] | |||||||||||
| service="backend", | ||||||||||||
| route=_resolve_route_label(request), | ||||||||||||
| method=request.method, | ||||||||||||
| status=str(response.status_code), | ||||||||||||
| status_code=response.status_code, | ||||||||||||
| duration_seconds=duration, | ||||||||||||
| ) | ||||||||||||
| return response | ||||||||||||
|
|
@@ -152,4 +166,4 @@ async def healthcheck() -> Dict[str, str]: | |||||||||||
| @app.get("/metrics") | ||||||||||||
| async def metrics() -> Response: | ||||||||||||
| # Expose Prometheus metrics including default process/python collectors | ||||||||||||
| return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST) | ||||||||||||
| return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST) | ||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧹 Nitpick | 🔵 Trivial
Consider exporter timeouts and TLS options
Exporters can hang on network issues. Provide a bounded timeout and, if applicable, certificate configuration via env to avoid stalls.
Example:
Repeat analogously for metrics kwargs.
Also applies to: 65-85
🤖 Prompt for AI Agents