Skip to content

Commit 0fde408

Browse files
added langfuse logging for responses api (#14597)
* added langfuse logging for responses api * tests added
1 parent f507bf8 commit 0fde408

File tree

2 files changed

+151
-1
lines changed

2 files changed

+151
-1
lines changed

litellm/integrations/langfuse/langfuse.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from litellm.llms.custom_httpx.http_handler import _get_httpx_client
1616
from litellm.secret_managers.main import str_to_bool
1717
from litellm.types.integrations.langfuse import *
18-
from litellm.types.llms.openai import HttpxBinaryResponseContent
18+
from litellm.types.llms.openai import HttpxBinaryResponseContent, ResponsesAPIResponse
1919
from litellm.types.utils import (
2020
EmbeddingResponse,
2121
ImageResponse,
@@ -196,6 +196,7 @@ def log_event_on_langfuse(
196196
TranscriptionResponse,
197197
RerankResponse,
198198
HttpxBinaryResponseContent,
199+
ResponsesAPIResponse,
199200
],
200201
start_time: Optional[datetime] = None,
201202
end_time: Optional[datetime] = None,
@@ -305,6 +306,7 @@ def _get_langfuse_input_output_content(
305306
TranscriptionResponse,
306307
RerankResponse,
307308
HttpxBinaryResponseContent,
309+
ResponsesAPIResponse,
308310
],
309311
prompt: dict,
310312
level: str,
@@ -369,6 +371,11 @@ def _get_langfuse_input_output_content(
369371
):
370372
input = prompt
371373
output = response_obj.results
374+
elif response_obj is not None and isinstance(
375+
response_obj, litellm.ResponsesAPIResponse
376+
):
377+
input = prompt
378+
output = self._get_responses_api_content_for_langfuse(response_obj)
372379
elif (
373380
kwargs.get("call_type") is not None
374381
and kwargs.get("call_type") == "_arealtime"
@@ -768,6 +775,19 @@ def _get_text_completion_content_for_langfuse(
768775
else:
769776
return None
770777

778+
@staticmethod
779+
def _get_responses_api_content_for_langfuse(
780+
response_obj: ResponsesAPIResponse,
781+
):
782+
"""
783+
Get the responses API content for Langfuse logging
784+
"""
785+
if hasattr(response_obj, 'output') and response_obj.output:
786+
# ResponsesAPIResponse.output is a list of strings
787+
return response_obj.output
788+
else:
789+
return None
790+
771791
@staticmethod
772792
def _get_langfuse_tags(
773793
standard_logging_object: Optional[StandardLoggingPayload],

tests/test_litellm/integrations/test_langfuse_otel.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
from litellm.integrations.langfuse.langfuse_otel import LangfuseOtelLogger
88
from litellm.types.integrations.langfuse_otel import LangfuseOtelConfig
9+
from litellm.types.llms.openai import ResponsesAPIResponse
10+
from datetime import datetime
911

1012

1113
class TestLangfuseOtelIntegration:
@@ -241,6 +243,134 @@ def test_get_langfuse_otel_config_with_otel_host_priority(self):
241243
_ = LangfuseOtelLogger.get_langfuse_otel_config()
242244

243245
assert os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT") == "https://otel-host.com/api/public/otel"
246+
247+
248+
class TestLangfuseOtelResponsesAPI:
249+
"""Test suite for Langfuse OTEL integration with ResponsesAPI"""
250+
251+
def test_langfuse_otel_with_responses_api(self):
252+
"""Test that Langfuse OTEL logger works with ResponsesAPI responses and logs metadata."""
253+
# Create a mock ResponsesAPIResponse
254+
mock_response = ResponsesAPIResponse(
255+
id="response-123",
256+
created_at=1234567890,
257+
output=[
258+
{
259+
"type": "message",
260+
"content": [{"type": "text", "text": "Hello from responses API"}]
261+
}
262+
],
263+
parallel_tool_calls=False,
264+
tool_choice="auto",
265+
tools=[],
266+
top_p=1.0
267+
)
268+
269+
# Create kwargs with metadata that should be logged
270+
test_metadata = {
271+
"user_id": "test123",
272+
"session_id": "abc456",
273+
"custom_field": "test_value",
274+
"generation_name": "responses_test_generation",
275+
"trace_name": "responses_api_trace"
276+
}
277+
278+
kwargs = {
279+
"call_type": "responses",
280+
"messages": [{"role": "user", "content": "Hello"}],
281+
"model": "gpt-4o",
282+
"optional_params": {},
283+
"litellm_params": {"metadata": test_metadata}
284+
}
285+
286+
mock_span = MagicMock()
287+
288+
with patch('litellm.integrations.arize._utils.set_attributes') as mock_set_attributes:
289+
with patch('litellm.integrations.arize._utils.safe_set_attribute') as mock_safe_set_attribute:
290+
logger = LangfuseOtelLogger()
291+
logger.set_langfuse_otel_attributes(mock_span, kwargs, mock_response)
292+
293+
# Verify that set_attributes was called for general attributes
294+
mock_set_attributes.assert_called_once_with(mock_span, kwargs, mock_response)
295+
296+
# Verify that Langfuse-specific attributes were set
297+
mock_safe_set_attribute.assert_any_call(
298+
mock_span, "langfuse.generation.name", "responses_test_generation"
299+
)
300+
mock_safe_set_attribute.assert_any_call(
301+
mock_span, "langfuse.trace.name", "responses_api_trace"
302+
)
303+
304+
def test_responses_api_metadata_extraction(self):
305+
"""Test that metadata is correctly extracted from ResponsesAPI kwargs."""
306+
# Clean up any existing module mocks
307+
import sys
308+
if "litellm.integrations.langfuse.langfuse" in sys.modules:
309+
original_module = sys.modules["litellm.integrations.langfuse.langfuse"]
310+
311+
test_metadata = {
312+
"user_id": "responses_user_123",
313+
"session_id": "responses_session_456",
314+
"custom_metadata": {"key": "value"},
315+
"generation_name": "responses_generation",
316+
"trace_id": "custom_trace_id"
317+
}
318+
319+
kwargs = {
320+
"call_type": "responses",
321+
"model": "gpt-4o",
322+
"litellm_params": {"metadata": test_metadata}
323+
}
324+
325+
extracted_metadata = LangfuseOtelLogger._extract_langfuse_metadata(kwargs)
326+
327+
# Verify all expected metadata was extracted (may have additional fields from header enrichment)
328+
for key, value in test_metadata.items():
329+
assert extracted_metadata[key] == value
330+
331+
assert extracted_metadata["user_id"] == "responses_user_123"
332+
assert extracted_metadata["generation_name"] == "responses_generation"
333+
assert extracted_metadata["trace_id"] == "custom_trace_id"
334+
335+
def test_responses_api_langfuse_specific_attributes(self):
336+
"""Test that ResponsesAPI metadata maps correctly to Langfuse OTEL attributes."""
337+
metadata = {
338+
"generation_name": "responses_gen",
339+
"generation_id": "resp_gen_123",
340+
"trace_name": "responses_trace",
341+
"trace_user_id": "resp_user_456",
342+
"session_id": "resp_session_789",
343+
"tags": ["responses", "api", "test"],
344+
"trace_metadata": {"source": "responses_api", "version": "1.0"}
345+
}
346+
347+
kwargs = {
348+
"call_type": "responses",
349+
"litellm_params": {"metadata": metadata}
350+
}
351+
352+
mock_span = MagicMock()
353+
354+
with patch('litellm.integrations.arize._utils.safe_set_attribute') as mock_safe_set_attribute:
355+
LangfuseOtelLogger._set_langfuse_specific_attributes(mock_span, kwargs)
356+
357+
# Verify specific attributes were set
358+
from litellm.types.integrations.langfuse_otel import LangfuseSpanAttributes
359+
360+
expected_calls = [
361+
(mock_span, LangfuseSpanAttributes.GENERATION_NAME.value, "responses_gen"),
362+
(mock_span, LangfuseSpanAttributes.GENERATION_ID.value, "resp_gen_123"),
363+
(mock_span, LangfuseSpanAttributes.TRACE_NAME.value, "responses_trace"),
364+
(mock_span, LangfuseSpanAttributes.TRACE_USER_ID.value, "resp_user_456"),
365+
(mock_span, LangfuseSpanAttributes.SESSION_ID.value, "resp_session_789"),
366+
(mock_span, LangfuseSpanAttributes.TAGS.value, json.dumps(["responses", "api", "test"])),
367+
(mock_span, LangfuseSpanAttributes.TRACE_METADATA.value,
368+
json.dumps({"source": "responses_api", "version": "1.0"}))
369+
]
370+
371+
for expected_call in expected_calls:
372+
mock_safe_set_attribute.assert_any_call(*expected_call)
373+
244374

245375

246376

0 commit comments

Comments
 (0)