Skip to content

Commit 0e05bef

Browse files
committed
feat: capture langchain message content by default
1 parent eb20620 commit 0e05bef

File tree

5 files changed

+190
-3
lines changed

5 files changed

+190
-3
lines changed

instrumentation-genai/opentelemetry-instrumentation-langchain/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111
([#3665](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3665))
1212
- Align LangChain instrumentation with GenAI schema 1.37.0 and add unit coverage for updated metadata.
1313
([#3813](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3813))
14+
- Capture GenAI input/output messages on spans by default with opt-out control.
15+
([#3813](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3813))

instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
---
3737
"""
3838

39+
import os
3940
from typing import Any, Callable, Collection
4041

4142
from langchain_core.callbacks import BaseCallbackHandler # type: ignore
@@ -72,6 +73,7 @@ def _instrument(self, **kwargs: Any):
7273
Enable Langchain instrumentation.
7374
"""
7475
tracer_provider = kwargs.get("tracer_provider")
76+
capture_messages = self._resolve_capture_messages(kwargs)
7577
tracer = get_tracer(
7678
__name__,
7779
__version__,
@@ -81,6 +83,7 @@ def _instrument(self, **kwargs: Any):
8183

8284
otel_callback_handler = OpenTelemetryLangChainCallbackHandler(
8385
tracer=tracer,
86+
capture_messages=capture_messages,
8487
)
8588

8689
wrap_function_wrapper(
@@ -95,6 +98,18 @@ def _uninstrument(self, **kwargs: Any):
9598
"""
9699
unwrap("langchain_core.callbacks.base.BaseCallbackManager", "__init__")
97100

101+
def _resolve_capture_messages(self, kwargs: dict[str, Any]) -> bool:
102+
if "capture_messages" in kwargs:
103+
return bool(kwargs["capture_messages"])
104+
105+
env_value = os.getenv(
106+
"OTEL_INSTRUMENTATION_LANGCHAIN_CAPTURE_MESSAGES"
107+
)
108+
if env_value is not None:
109+
return env_value.lower() in ("1", "true", "yes", "on")
110+
111+
return True
112+
98113

99114
class _BaseCallbackManagerInitWrapper:
100115
"""

instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/callback_handler.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from __future__ import annotations
1616

17+
import json
1718
from typing import Any
1819
from urllib.parse import urlparse
1920
from uuid import UUID
@@ -68,12 +69,14 @@ class OpenTelemetryLangChainCallbackHandler(BaseCallbackHandler): # type: ignor
6869
def __init__(
6970
self,
7071
tracer: Tracer,
72+
capture_messages: bool,
7173
) -> None:
7274
super().__init__() # type: ignore
7375

7476
self.span_manager = _SpanManager(
7577
tracer=tracer,
7678
)
79+
self._capture_messages = capture_messages
7780

7881
def on_chat_model_start(
7982
self,
@@ -113,6 +116,13 @@ def on_chat_model_start(
113116

114117
self._apply_request_attributes(span, params, metadata)
115118

119+
if self._capture_messages and messages:
120+
serialized_messages = self._serialize_input_messages(messages)
121+
span.set_attribute(
122+
GenAI.GEN_AI_INPUT_MESSAGES,
123+
self._serialize_to_json(serialized_messages),
124+
)
125+
116126
def _resolve_provider(
117127
self, llm_name: str | None, metadata: dict[str, Any] | None
118128
) -> str | None:
@@ -295,6 +305,59 @@ def _extract_output_type(self, params: dict[str, Any]) -> str | None:
295305

296306
return mapping.get(lowered)
297307

308+
def _serialize_input_messages(
309+
self, messages: list[list[BaseMessage]]
310+
) -> list[dict[str, Any]]:
311+
serialized: list[dict[str, Any]] = []
312+
for conversation in messages:
313+
for message in conversation:
314+
serialized.append(self._serialize_message(message))
315+
return serialized
316+
317+
def _serialize_output_messages(
318+
self, response: LLMResult
319+
) -> list[dict[str, Any]]:
320+
serialized: list[dict[str, Any]] = []
321+
generations = getattr(response, "generations", []) # type: ignore
322+
for generation in generations:
323+
for item in generation:
324+
message = getattr(item, "message", None)
325+
if message is not None:
326+
serialized.append(self._serialize_message(message))
327+
return serialized
328+
329+
def _serialize_message(self, message: BaseMessage) -> dict[str, Any]:
330+
payload: dict[str, Any] = {
331+
"type": getattr(message, "type", message.__class__.__name__),
332+
"content": getattr(message, "content", None),
333+
}
334+
for attr in (
335+
"additional_kwargs",
336+
"response_metadata",
337+
"tool_call_id",
338+
"tool_calls",
339+
"usage_metadata",
340+
"id",
341+
"name",
342+
):
343+
value = getattr(message, attr, None)
344+
if value:
345+
payload[attr] = value
346+
return payload
347+
348+
def _serialize_to_json(self, payload: Any) -> str:
349+
return json.dumps(payload, default=self._json_default)
350+
351+
@staticmethod
352+
def _json_default(value: Any) -> Any:
353+
if isinstance(value, (str, int, float, bool)) or value is None:
354+
return value
355+
if isinstance(value, dict):
356+
return value
357+
if isinstance(value, (list, tuple)):
358+
return list(value)
359+
return getattr(value, "__dict__", str(value))
360+
298361
def on_llm_end(
299362
self,
300363
response: LLMResult, # type: ignore [reportUnknownParameterType]
@@ -379,6 +442,14 @@ def on_llm_end(
379442
OPENAI_RESPONSE_SYSTEM_FINGERPRINT, system_fingerprint
380443
)
381444

445+
if self._capture_messages:
446+
serialized_outputs = self._serialize_output_messages(response)
447+
if serialized_outputs:
448+
span.set_attribute(
449+
GenAI.GEN_AI_OUTPUT_MESSAGES,
450+
self._serialize_to_json(serialized_outputs),
451+
)
452+
382453
# End the LLM span
383454
self.span_manager.end_span(run_id)
384455

instrumentation-genai/opentelemetry-instrumentation-langchain/tests/test_callback_handler.py

Lines changed: 100 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import json
34
from dataclasses import dataclass
45
from typing import Any
56
from uuid import uuid4
@@ -24,12 +25,13 @@
2425
)
2526

2627

27-
def _create_handler():
28+
def _create_handler(capture_messages: bool = True):
2829
exporter = InMemorySpanExporter()
2930
provider = TracerProvider()
3031
provider.add_span_processor(SimpleSpanProcessor(exporter))
3132
handler = OpenTelemetryLangChainCallbackHandler(
32-
tracer=provider.get_tracer(__name__)
33+
tracer=provider.get_tracer(__name__),
34+
capture_messages=capture_messages,
3335
)
3436
return handler, exporter
3537

@@ -77,6 +79,22 @@ class _DummyLLMResult:
7779
llm_output: dict[str, Any]
7880

7981

82+
@dataclass
83+
class _DummyGeneration:
84+
message: Any
85+
generation_info: dict[str, Any] | None = None
86+
87+
88+
@dataclass
89+
class _FakeMessage:
90+
content: str
91+
type: str
92+
additional_kwargs: dict[str, Any] | None = None
93+
response_metadata: dict[str, Any] | None = None
94+
usage_metadata: dict[str, Any] | None = None
95+
id: str | None = None
96+
97+
8098
def test_llm_end_sets_response_metadata():
8199
handler, exporter = _create_handler()
82100
run_id = uuid4()
@@ -134,3 +152,83 @@ def test_choice_count_not_set_when_one():
134152
handler.span_manager.end_span(run_id)
135153
span = exporter.get_finished_spans()[0]
136154
assert GenAI.GEN_AI_REQUEST_CHOICE_COUNT not in span.attributes
155+
156+
157+
def test_capture_messages_sets_attributes_by_default():
158+
handler, exporter = _create_handler()
159+
run_id = uuid4()
160+
161+
handler.on_chat_model_start(
162+
serialized={"name": "ChatOpenAI"},
163+
messages=[
164+
[
165+
_FakeMessage(
166+
content="hello",
167+
type="human",
168+
id="m1",
169+
)
170+
]
171+
],
172+
run_id=run_id,
173+
tags=None,
174+
parent_run_id=None,
175+
metadata={"ls_model_name": "gpt-4"},
176+
invocation_params={"params": {"model": "gpt-4"}},
177+
)
178+
179+
handler.on_llm_end(
180+
_DummyLLMResult(
181+
generations=[
182+
[
183+
_DummyGeneration(
184+
message=_FakeMessage(
185+
content="result",
186+
type="ai",
187+
id="m2",
188+
)
189+
)
190+
]
191+
],
192+
llm_output={},
193+
),
194+
run_id=run_id,
195+
parent_run_id=None,
196+
)
197+
198+
span = exporter.get_finished_spans()[0]
199+
input_payload = json.loads(span.attributes[GenAI.GEN_AI_INPUT_MESSAGES])
200+
output_payload = json.loads(span.attributes[GenAI.GEN_AI_OUTPUT_MESSAGES])
201+
assert input_payload[0]["content"] == "hello"
202+
assert output_payload[0]["content"] == "result"
203+
204+
205+
def test_capture_messages_can_be_disabled():
206+
handler, exporter = _create_handler(capture_messages=False)
207+
run_id = uuid4()
208+
209+
handler.on_chat_model_start(
210+
serialized={"name": "ChatOpenAI"},
211+
messages=[
212+
[
213+
_FakeMessage(
214+
content="hello",
215+
type="human",
216+
)
217+
]
218+
],
219+
run_id=run_id,
220+
tags=None,
221+
parent_run_id=None,
222+
metadata={"ls_model_name": "gpt-4"},
223+
invocation_params={"params": {"model": "gpt-4"}},
224+
)
225+
226+
handler.on_llm_end(
227+
_DummyLLMResult(generations=[], llm_output={}),
228+
run_id=run_id,
229+
parent_run_id=None,
230+
)
231+
232+
span = exporter.get_finished_spans()[0]
233+
assert GenAI.GEN_AI_INPUT_MESSAGES not in span.attributes
234+
assert GenAI.GEN_AI_OUTPUT_MESSAGES not in span.attributes

instrumentation-genai/opentelemetry-instrumentation-langchain/tests/test_llm_call.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,8 @@ def test_azure_chat_sets_provider_and_server_attributes():
188188
provider = TracerProvider()
189189
provider.add_span_processor(SimpleSpanProcessor(exporter))
190190
handler = OpenTelemetryLangChainCallbackHandler(
191-
provider.get_tracer(__name__)
191+
provider.get_tracer(__name__),
192+
capture_messages=True,
192193
)
193194

194195
run_id = uuid4()

0 commit comments

Comments
 (0)