Skip to content

Commit dbeceb8

Browse files
authored
fix(langchain): langgraph application crash due to context detach (#3256)
1 parent f706a9f commit dbeceb8

File tree

2 files changed

+88
-27
lines changed

2 files changed

+88
-27
lines changed

packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/__init__.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,13 @@ def __call__(
242242

243243
# In legacy chains like LLMChain, suppressing model instrumentations
244244
# within create_llm_span doesn't work, so this should helps as a fallback
245-
context_api.attach(
246-
context_api.set_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY, True)
247-
)
245+
try:
246+
context_api.attach(
247+
context_api.set_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY, True)
248+
)
249+
except Exception:
250+
# If context setting fails, continue without suppression
251+
# This is not critical for core functionality
252+
pass
248253

249254
return wrapped(*args, **kwargs)

packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py

Lines changed: 80 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -187,18 +187,57 @@ def _end_span(self, span: Span, run_id: UUID) -> None:
187187
span.end()
188188
token = self.spans[run_id].token
189189
if token:
190-
try:
191-
# Use the runtime context directly to avoid logging from context_api.detach()
192-
from opentelemetry.context import _RUNTIME_CONTEXT
193-
_RUNTIME_CONTEXT.detach(token)
194-
except (ValueError, RuntimeError, Exception):
195-
# Context detach can fail in async scenarios when tokens are created in different contexts
196-
# This includes ValueError, RuntimeError, and other context-related exceptions
197-
# This is expected behavior and doesn't affect the correct span hierarchy
198-
pass
190+
self._safe_detach_context(token)
199191

200192
del self.spans[run_id]
201193

194+
def _safe_attach_context(self, span: Span):
195+
"""
196+
Safely attach span to context, handling potential failures in async scenarios.
197+
198+
Returns the context token for later detachment, or None if attachment fails.
199+
"""
200+
try:
201+
return context_api.attach(set_span_in_context(span))
202+
except Exception:
203+
# Context attachment can fail in some edge cases, particularly in
204+
# complex async scenarios or when context is corrupted.
205+
# Return None to indicate no token needs to be detached later.
206+
return None
207+
208+
def _safe_detach_context(self, token):
209+
"""
210+
Safely detach context token without causing application crashes.
211+
212+
This method implements a fail-safe approach to context detachment that handles
213+
all known edge cases in async/concurrent scenarios where context tokens may
214+
become invalid or be detached in different execution contexts.
215+
216+
We use the runtime context directly to avoid logging errors from context_api.detach()
217+
"""
218+
if not token:
219+
return
220+
221+
try:
222+
# Use the runtime context directly to avoid error logging from context_api.detach()
223+
from opentelemetry.context import _RUNTIME_CONTEXT
224+
225+
_RUNTIME_CONTEXT.detach(token)
226+
except Exception:
227+
# Context detach can fail in async scenarios when tokens are created in different contexts
228+
# This includes ValueError, RuntimeError, and other context-related exceptions
229+
# This is expected behavior and doesn't affect the correct span hierarchy
230+
#
231+
# Common scenarios where this happens:
232+
# 1. Token created in one async task/thread, detached in another
233+
# 2. Context was already detached by another process
234+
# 3. Token became invalid due to context switching
235+
# 4. Race conditions in highly concurrent scenarios
236+
#
237+
# This is safe to ignore as the span itself was properly ended
238+
# and the tracing data is correctly captured.
239+
pass
240+
202241
def _create_span(
203242
self,
204243
run_id: UUID,
@@ -220,12 +259,17 @@ def _create_span(
220259
for k, v in metadata.items()
221260
if v is not None
222261
}
223-
context_api.attach(
224-
context_api.set_value(
225-
"association_properties",
226-
{**current_association_properties, **sanitized_metadata},
262+
try:
263+
context_api.attach(
264+
context_api.set_value(
265+
"association_properties",
266+
{**current_association_properties, **sanitized_metadata},
267+
)
227268
)
228-
)
269+
except Exception:
270+
# If setting association properties fails, continue without them
271+
# This doesn't affect the core span functionality
272+
pass
229273

230274
if parent_run_id is not None and parent_run_id in self.spans:
231275
span = self.tracer.start_span(
@@ -236,7 +280,7 @@ def _create_span(
236280
else:
237281
span = self.tracer.start_span(span_name, kind=kind)
238282

239-
token = context_api.attach(set_span_in_context(span))
283+
token = self._safe_attach_context(span)
240284

241285
_set_span_attribute(span, SpanAttributes.TRACELOOP_WORKFLOW_NAME, workflow_name)
242286
_set_span_attribute(span, SpanAttributes.TRACELOOP_ENTITY_PATH, entity_path)
@@ -317,9 +361,13 @@ def _create_llm_span(
317361

318362
# we already have an LLM span by this point,
319363
# so skip any downstream instrumentation from here
320-
token = context_api.attach(
321-
context_api.set_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY, True)
322-
)
364+
try:
365+
token = context_api.attach(
366+
context_api.set_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY, True)
367+
)
368+
except Exception:
369+
# If context setting fails, continue without suppression token
370+
token = None
323371

324372
self.spans[run_id] = SpanHolder(
325373
span, token, None, [], workflow_name, None, entity_path
@@ -411,11 +459,15 @@ def on_chain_end(
411459

412460
self._end_span(span, run_id)
413461
if parent_run_id is None:
414-
context_api.attach(
415-
context_api.set_value(
416-
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY, False
462+
try:
463+
context_api.attach(
464+
context_api.set_value(
465+
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY, False
466+
)
417467
)
418-
)
468+
except Exception:
469+
# If context reset fails, it's not critical for functionality
470+
pass
419471

420472
@dont_throw
421473
def on_chat_model_start(
@@ -513,8 +565,12 @@ def on_llm_end(
513565
if model_name is None:
514566
model_name = extract_model_name_from_response_metadata(response)
515567
if model_name is None and hasattr(context_api, "get_value"):
516-
association_properties = context_api.get_value("association_properties") or {}
517-
model_name = _extract_model_name_from_association_metadata(association_properties)
568+
association_properties = (
569+
context_api.get_value("association_properties") or {}
570+
)
571+
model_name = _extract_model_name_from_association_metadata(
572+
association_properties
573+
)
518574
token_usage = (response.llm_output or {}).get("token_usage") or (
519575
response.llm_output or {}
520576
).get("usage")

0 commit comments

Comments
 (0)