88import hug
99import os
1010import re
11+ import time
1112from urllib .parse import urlparse
1213import requests
1314from falcon import HTTP_401 , HTTP_400 , HTTP_OK , HTTP_500 , Response
5051_OTEL_TRACER = None
5152_OTEL_TRACER_PROVIDER = None
5253_OTEL_INIT_ERROR = None
54+ _OTEL_DISABLED = False
55+ _OTEL_DISABLED_REASON = None
56+
57+ def _disable_otel (reason ):
58+ global _OTEL_DISABLED , _OTEL_DISABLED_REASON
59+ if _OTEL_DISABLED :
60+ return
61+ _OTEL_DISABLED = True
62+ _OTEL_DISABLED_REASON = reason
63+ try :
64+ cla .log .info (f"LG:otel-datadog-disabled reason={ reason } " )
65+ except Exception :
66+ pass
5367
5468# --- Path sanitizer regexes (mirror ./utils/count_apis.sh, but keep /vN versions intact) ---
5569_RE_MULTI_SLASH = re .compile (r"/{2,}" )
@@ -158,6 +172,11 @@ def _init_otel_datadog() -> None:
158172 """
159173 global _OTEL_TRACER , _OTEL_TRACER_PROVIDER , _OTEL_INIT_ERROR
160174
175+ if _OTEL_DISABLED :
176+ return
177+
178+
179+
161180 if _OTEL_TRACER is not None or _OTEL_INIT_ERROR is not None :
162181 return
163182 try :
@@ -169,10 +188,7 @@ def _init_otel_datadog() -> None:
169188 from opentelemetry .exporter .otlp .proto .http .trace_exporter import OTLPSpanExporter
170189 except Exception as e :
171190 _OTEL_INIT_ERROR = e
172- try :
173- cla .log .info (f"LG:otel-datadog-init-failed err={ e } " )
174- except Exception :
175- pass
191+ _disable_otel (f"init-import-1 err={ e } " )
176192 return
177193
178194 try :
@@ -183,10 +199,30 @@ def _init_otel_datadog() -> None:
183199
184200 endpoint = _build_otlp_traces_endpoint ()
185201
186- exporter = OTLPSpanExporter (
187- endpoint = endpoint ,
188- timeout = 2 , # seconds; keep aligned with Go exporter timeout intent
189- )
202+ exporter = OTLPSpanExporter (endpoint = endpoint , timeout = 0.5 )
203+
204+ # Wrap exporter so any export failure disables tracing for this container
205+ from opentelemetry .sdk .trace .export import SpanExportResult
206+ class _FailFastExporter :
207+ def __init__ (self , inner ):
208+ self ._inner = inner
209+ def export (self , spans ):
210+ if _OTEL_DISABLED :
211+ return SpanExportResult .FAILURE
212+ try :
213+ res = self ._inner .export (spans )
214+ except Exception as ex :
215+ _disable_otel (f"export err={ ex } " )
216+ return SpanExportResult .FAILURE
217+ if res != SpanExportResult .SUCCESS :
218+ _disable_otel (f"export result={ res } " )
219+ return res
220+ def shutdown (self ):
221+ try :
222+ return self ._inner .shutdown ()
223+ except Exception :
224+ return
225+
190226
191227 resource = Resource .create ({
192228 # Vendor-neutral resource attrs (Datadog maps these automatically).
@@ -196,21 +232,18 @@ def _init_otel_datadog() -> None:
196232 })
197233
198234 provider = TracerProvider (resource = resource )
199- # In Lambda, synchronous export is the safest default (no custom threads, no buffering loss on freeze) .
200- provider .add_span_processor (SimpleSpanProcessor (exporter ))
235+ # In Lambda, synchronous export is safest; FailFastExporter prevents repeated latency on failure .
236+ provider .add_span_processor (SimpleSpanProcessor (_FailFastExporter ( exporter ) ))
201237
202238 otel_trace .set_tracer_provider (provider )
203239
204240 _OTEL_TRACER_PROVIDER = provider
205241 _OTEL_TRACER = otel_trace .get_tracer ("easycla-http" )
206242 except Exception as e :
207243 _OTEL_INIT_ERROR = e
208- try :
209- cla .log .info (f"LG:otel-datadog-init-failed err={ e } " )
210- except Exception :
211- pass
244+ _disable_otel (f"init-import-2 err={ e } " )
212245
213- def _parse_http_status_code (status ) -> int | None :
246+ def _parse_http_status_code (status ):
214247 """
215248 Falcon typically stores response.status like "200 OK".
216249 Return int status code or None.
@@ -318,24 +351,15 @@ def _otel_end_request_span(request, response) -> None:
318351 if span is None :
319352 return
320353
321- from opentelemetry .trace import Status , StatusCode
322-
323354 status_code = _parse_http_status_code (getattr (response , "status" , None ))
324355 if status_code is not None :
325356 span .set_attribute ("http.status_code" , status_code )
326357 # Mark 5xx as errors (4xx are usually client errors, not service faults)
327358 if status_code >= 500 :
359+ from opentelemetry .trace import Status , StatusCode
328360 span .set_status (Status (StatusCode .ERROR ))
329361
330362 span .end ()
331-
332- # With SimpleSpanProcessor this is effectively a no-op, but keeps behavior safe
333- # if the processor changes in the future.
334- try :
335- if _OTEL_TRACER_PROVIDER is not None :
336- _OTEL_TRACER_PROVIDER .force_flush (timeout_millis = 2000 )
337- except Exception :
338- pass
339363 except Exception as e :
340364 try :
341365 if route is None :
@@ -463,6 +487,10 @@ def check_auth(request=None, **kwargs):
463487def handle_auth_error (exception , response = None , ** kwargs ):
464488 """Handles authentication errors"""
465489 response .status = HTTP_401
490+
491+ # Ensure OTel span closes even if response middleware isn't invoked for exceptions.
492+ if _enabled_by_env_or_stage ("OTEL_DATADOG_API_LOGGING" , default_by_stage = (True , True )):
493+ _otel_end_request_span (request , response )
466494 return exception .response
467495
468496
0 commit comments