@@ -187,18 +187,57 @@ def _end_span(self, span: Span, run_id: UUID) -> None:
187
187
span .end ()
188
188
token = self .spans [run_id ].token
189
189
if token :
190
- try :
191
- # Use the runtime context directly to avoid logging from context_api.detach()
192
- from opentelemetry .context import _RUNTIME_CONTEXT
193
- _RUNTIME_CONTEXT .detach (token )
194
- except (ValueError , RuntimeError , Exception ):
195
- # Context detach can fail in async scenarios when tokens are created in different contexts
196
- # This includes ValueError, RuntimeError, and other context-related exceptions
197
- # This is expected behavior and doesn't affect the correct span hierarchy
198
- pass
190
+ self ._safe_detach_context (token )
199
191
200
192
del self .spans [run_id ]
201
193
194
+ def _safe_attach_context (self , span : Span ):
195
+ """
196
+ Safely attach span to context, handling potential failures in async scenarios.
197
+
198
+ Returns the context token for later detachment, or None if attachment fails.
199
+ """
200
+ try :
201
+ return context_api .attach (set_span_in_context (span ))
202
+ except Exception :
203
+ # Context attachment can fail in some edge cases, particularly in
204
+ # complex async scenarios or when context is corrupted.
205
+ # Return None to indicate no token needs to be detached later.
206
+ return None
207
+
208
+ def _safe_detach_context (self , token ):
209
+ """
210
+ Safely detach context token without causing application crashes.
211
+
212
+ This method implements a fail-safe approach to context detachment that handles
213
+ all known edge cases in async/concurrent scenarios where context tokens may
214
+ become invalid or be detached in different execution contexts.
215
+
216
+ We use the runtime context directly to avoid logging errors from context_api.detach()
217
+ """
218
+ if not token :
219
+ return
220
+
221
+ try :
222
+ # Use the runtime context directly to avoid error logging from context_api.detach()
223
+ from opentelemetry .context import _RUNTIME_CONTEXT
224
+
225
+ _RUNTIME_CONTEXT .detach (token )
226
+ except Exception :
227
+ # Context detach can fail in async scenarios when tokens are created in different contexts
228
+ # This includes ValueError, RuntimeError, and other context-related exceptions
229
+ # This is expected behavior and doesn't affect the correct span hierarchy
230
+ #
231
+ # Common scenarios where this happens:
232
+ # 1. Token created in one async task/thread, detached in another
233
+ # 2. Context was already detached by another process
234
+ # 3. Token became invalid due to context switching
235
+ # 4. Race conditions in highly concurrent scenarios
236
+ #
237
+ # This is safe to ignore as the span itself was properly ended
238
+ # and the tracing data is correctly captured.
239
+ pass
240
+
202
241
def _create_span (
203
242
self ,
204
243
run_id : UUID ,
@@ -220,12 +259,17 @@ def _create_span(
220
259
for k , v in metadata .items ()
221
260
if v is not None
222
261
}
223
- context_api .attach (
224
- context_api .set_value (
225
- "association_properties" ,
226
- {** current_association_properties , ** sanitized_metadata },
262
+ try :
263
+ context_api .attach (
264
+ context_api .set_value (
265
+ "association_properties" ,
266
+ {** current_association_properties , ** sanitized_metadata },
267
+ )
227
268
)
228
- )
269
+ except Exception :
270
+ # If setting association properties fails, continue without them
271
+ # This doesn't affect the core span functionality
272
+ pass
229
273
230
274
if parent_run_id is not None and parent_run_id in self .spans :
231
275
span = self .tracer .start_span (
@@ -236,7 +280,7 @@ def _create_span(
236
280
else :
237
281
span = self .tracer .start_span (span_name , kind = kind )
238
282
239
- token = context_api . attach ( set_span_in_context ( span ) )
283
+ token = self . _safe_attach_context ( span )
240
284
241
285
_set_span_attribute (span , SpanAttributes .TRACELOOP_WORKFLOW_NAME , workflow_name )
242
286
_set_span_attribute (span , SpanAttributes .TRACELOOP_ENTITY_PATH , entity_path )
@@ -317,9 +361,13 @@ def _create_llm_span(
317
361
318
362
# we already have an LLM span by this point,
319
363
# so skip any downstream instrumentation from here
320
- token = context_api .attach (
321
- context_api .set_value (SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY , True )
322
- )
364
+ try :
365
+ token = context_api .attach (
366
+ context_api .set_value (SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY , True )
367
+ )
368
+ except Exception :
369
+ # If context setting fails, continue without suppression token
370
+ token = None
323
371
324
372
self .spans [run_id ] = SpanHolder (
325
373
span , token , None , [], workflow_name , None , entity_path
@@ -411,11 +459,15 @@ def on_chain_end(
411
459
412
460
self ._end_span (span , run_id )
413
461
if parent_run_id is None :
414
- context_api .attach (
415
- context_api .set_value (
416
- SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY , False
462
+ try :
463
+ context_api .attach (
464
+ context_api .set_value (
465
+ SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY , False
466
+ )
417
467
)
418
- )
468
+ except Exception :
469
+ # If context reset fails, it's not critical for functionality
470
+ pass
419
471
420
472
@dont_throw
421
473
def on_chat_model_start (
@@ -513,8 +565,12 @@ def on_llm_end(
513
565
if model_name is None :
514
566
model_name = extract_model_name_from_response_metadata (response )
515
567
if model_name is None and hasattr (context_api , "get_value" ):
516
- association_properties = context_api .get_value ("association_properties" ) or {}
517
- model_name = _extract_model_name_from_association_metadata (association_properties )
568
+ association_properties = (
569
+ context_api .get_value ("association_properties" ) or {}
570
+ )
571
+ model_name = _extract_model_name_from_association_metadata (
572
+ association_properties
573
+ )
518
574
token_usage = (response .llm_output or {}).get ("token_usage" ) or (
519
575
response .llm_output or {}
520
576
).get ("usage" )
0 commit comments