Skip to content

Commit 64e812f

Browse files
hassiebpyash025
andauthored
fix(client): setting OTEL span status as error on Langfuse error (#1387) (#1388)
Co-authored-by: Yashwanth <[email protected]>
1 parent 60748e8 commit 64e812f

File tree

2 files changed

+253
-0
lines changed

2 files changed

+253
-0
lines changed

langfuse/_client/span.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
from opentelemetry import trace as otel_trace_api
3333
from opentelemetry.util._decorator import _AgnosticContextManager
34+
from opentelemetry.trace.status import Status, StatusCode
3435

3536
from langfuse.model import PromptClient
3637

@@ -188,6 +189,8 @@ def __init__(
188189
self._otel_span.set_attributes(
189190
{k: v for k, v in attributes.items() if v is not None}
190191
)
192+
# Set OTEL span status if level is ERROR
193+
self._set_otel_span_status_if_error(level=level, status_message=status_message)
191194

192195
def end(self, *, end_time: Optional[int] = None) -> "LangfuseObservationWrapper":
193196
"""End the span, marking it as completed.
@@ -540,6 +543,28 @@ def _process_media_in_attribute(
540543

541544
return data
542545

546+
def _set_otel_span_status_if_error(
547+
self, *, level: Optional[SpanLevel] = None, status_message: Optional[str] = None
548+
) -> None:
549+
"""Set OpenTelemetry span status to ERROR if level is ERROR.
550+
551+
This method sets the underlying OpenTelemetry span status to ERROR when the
552+
Langfuse observation level is set to ERROR, ensuring consistency between
553+
Langfuse and OpenTelemetry error states.
554+
555+
Args:
556+
level: The span level to check
557+
status_message: Optional status message to include as description
558+
"""
559+
if level == "ERROR" and self._otel_span.is_recording():
560+
try:
561+
self._otel_span.set_status(
562+
Status(StatusCode.ERROR, description=status_message)
563+
)
564+
except Exception:
565+
# Silently ignore any errors when setting OTEL status to avoid existing flow disruptions
566+
pass
567+
543568
def update(
544569
self,
545570
*,
@@ -636,6 +661,8 @@ def update(
636661
)
637662

638663
self._otel_span.set_attributes(attributes=attributes)
664+
# Set OTEL span status if level is ERROR
665+
self._set_otel_span_status_if_error(level=level, status_message=status_message)
639666

640667
return self
641668

tests/test_otel.py

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -944,6 +944,232 @@ def test_error_handling(self, langfuse_client, memory_exporter):
944944
== "Test error message"
945945
)
946946

947+
def test_error_level_in_span_creation(self, langfuse_client, memory_exporter):
948+
"""Test that OTEL span status is set to ERROR when creating spans with level='ERROR'."""
949+
# Create a span with level="ERROR" at creation time
950+
span = langfuse_client.start_span(
951+
name="create-error-span",
952+
level="ERROR",
953+
status_message="Initial error state"
954+
)
955+
span.end()
956+
957+
# Get the raw OTEL spans to check the status
958+
raw_spans = [
959+
s for s in memory_exporter.get_finished_spans()
960+
if s.name == "create-error-span"
961+
]
962+
assert len(raw_spans) == 1, "Expected one span"
963+
raw_span = raw_spans[0]
964+
965+
# Verify OTEL span status was set to ERROR
966+
from opentelemetry.trace.status import StatusCode
967+
assert raw_span.status.status_code == StatusCode.ERROR
968+
assert raw_span.status.description == "Initial error state"
969+
970+
# Also verify Langfuse attributes
971+
spans = self.get_spans_by_name(memory_exporter, "create-error-span")
972+
span_data = spans[0]
973+
attributes = span_data["attributes"]
974+
assert attributes[LangfuseOtelSpanAttributes.OBSERVATION_LEVEL] == "ERROR"
975+
assert attributes[LangfuseOtelSpanAttributes.OBSERVATION_STATUS_MESSAGE] == "Initial error state"
976+
977+
def test_error_level_in_span_update(self, langfuse_client, memory_exporter):
978+
"""Test that OTEL span status is set to ERROR when updating spans to level='ERROR'."""
979+
# Create a normal span
980+
span = langfuse_client.start_span(name="update-error-span", level="INFO")
981+
982+
# Update it to ERROR level
983+
span.update(level="ERROR", status_message="Updated to error state")
984+
span.end()
985+
986+
# Get the raw OTEL spans to check the status
987+
raw_spans = [
988+
s for s in memory_exporter.get_finished_spans()
989+
if s.name == "update-error-span"
990+
]
991+
assert len(raw_spans) == 1, "Expected one span"
992+
raw_span = raw_spans[0]
993+
994+
# Verify OTEL span status was set to ERROR
995+
from opentelemetry.trace.status import StatusCode
996+
assert raw_span.status.status_code == StatusCode.ERROR
997+
assert raw_span.status.description == "Updated to error state"
998+
999+
# Also verify Langfuse attributes
1000+
spans = self.get_spans_by_name(memory_exporter, "update-error-span")
1001+
span_data = spans[0]
1002+
attributes = span_data["attributes"]
1003+
assert attributes[LangfuseOtelSpanAttributes.OBSERVATION_LEVEL] == "ERROR"
1004+
assert attributes[LangfuseOtelSpanAttributes.OBSERVATION_STATUS_MESSAGE] == "Updated to error state"
1005+
1006+
def test_generation_error_level_in_creation(self, langfuse_client, memory_exporter):
1007+
"""Test that OTEL span status is set to ERROR when creating generations with level='ERROR'."""
1008+
# Create a generation with level="ERROR" at creation time
1009+
generation = langfuse_client.start_generation(
1010+
name="create-error-generation",
1011+
model="gpt-4",
1012+
level="ERROR",
1013+
status_message="Generation failed during creation"
1014+
)
1015+
generation.end()
1016+
1017+
# Get the raw OTEL spans to check the status
1018+
raw_spans = [
1019+
s for s in memory_exporter.get_finished_spans()
1020+
if s.name == "create-error-generation"
1021+
]
1022+
assert len(raw_spans) == 1, "Expected one span"
1023+
raw_span = raw_spans[0]
1024+
1025+
# Verify OTEL span status was set to ERROR
1026+
from opentelemetry.trace.status import StatusCode
1027+
assert raw_span.status.status_code == StatusCode.ERROR
1028+
assert raw_span.status.description == "Generation failed during creation"
1029+
1030+
# Also verify Langfuse attributes
1031+
spans = self.get_spans_by_name(memory_exporter, "create-error-generation")
1032+
span_data = spans[0]
1033+
attributes = span_data["attributes"]
1034+
assert attributes[LangfuseOtelSpanAttributes.OBSERVATION_LEVEL] == "ERROR"
1035+
assert attributes[LangfuseOtelSpanAttributes.OBSERVATION_STATUS_MESSAGE] == "Generation failed during creation"
1036+
1037+
def test_generation_error_level_in_update(self, langfuse_client, memory_exporter):
1038+
"""Test that OTEL span status is set to ERROR when updating generations to level='ERROR'."""
1039+
# Create a normal generation
1040+
generation = langfuse_client.start_generation(
1041+
name="update-error-generation",
1042+
model="gpt-4",
1043+
level="INFO"
1044+
)
1045+
1046+
# Update it to ERROR level
1047+
generation.update(level="ERROR", status_message="Generation failed during execution")
1048+
generation.end()
1049+
1050+
# Get the raw OTEL spans to check the status
1051+
raw_spans = [
1052+
s for s in memory_exporter.get_finished_spans()
1053+
if s.name == "update-error-generation"
1054+
]
1055+
assert len(raw_spans) == 1, "Expected one span"
1056+
raw_span = raw_spans[0]
1057+
1058+
# Verify OTEL span status was set to ERROR
1059+
from opentelemetry.trace.status import StatusCode
1060+
assert raw_span.status.status_code == StatusCode.ERROR
1061+
assert raw_span.status.description == "Generation failed during execution"
1062+
1063+
# Also verify Langfuse attributes
1064+
spans = self.get_spans_by_name(memory_exporter, "update-error-generation")
1065+
span_data = spans[0]
1066+
attributes = span_data["attributes"]
1067+
assert attributes[LangfuseOtelSpanAttributes.OBSERVATION_LEVEL] == "ERROR"
1068+
assert attributes[LangfuseOtelSpanAttributes.OBSERVATION_STATUS_MESSAGE] == "Generation failed during execution"
1069+
1070+
def test_non_error_levels_dont_set_otel_status(self, langfuse_client, memory_exporter):
1071+
"""Test that non-ERROR levels don't set OTEL span status to ERROR."""
1072+
# Test different non-error levels
1073+
test_levels = ["INFO", "WARNING", "DEBUG", None]
1074+
1075+
for i, level in enumerate(test_levels):
1076+
span_name = f"non-error-span-{i}"
1077+
span = langfuse_client.start_span(name=span_name, level=level)
1078+
1079+
# Update with same level to test update path too
1080+
if level is not None:
1081+
span.update(level=level, status_message="Not an error")
1082+
1083+
span.end()
1084+
1085+
# Get the raw OTEL spans to check the status
1086+
raw_spans = [
1087+
s for s in memory_exporter.get_finished_spans()
1088+
if s.name == span_name
1089+
]
1090+
assert len(raw_spans) == 1, f"Expected one span for {span_name}"
1091+
raw_span = raw_spans[0]
1092+
1093+
# Verify OTEL span status was NOT set to ERROR
1094+
from opentelemetry.trace.status import StatusCode
1095+
# Default status should be UNSET, not ERROR
1096+
assert raw_span.status.status_code != StatusCode.ERROR, f"Level {level} should not set ERROR status"
1097+
1098+
def test_multiple_error_updates(self, langfuse_client, memory_exporter):
1099+
"""Test that multiple ERROR level updates work correctly."""
1100+
# Create a span
1101+
span = langfuse_client.start_span(name="multi-error-span")
1102+
1103+
# First error update
1104+
span.update(level="ERROR", status_message="First error")
1105+
1106+
# Second error update - should overwrite the first
1107+
span.update(level="ERROR", status_message="Second error")
1108+
1109+
span.end()
1110+
1111+
# Get the raw OTEL spans to check the status
1112+
raw_spans = [
1113+
s for s in memory_exporter.get_finished_spans()
1114+
if s.name == "multi-error-span"
1115+
]
1116+
assert len(raw_spans) == 1, "Expected one span"
1117+
raw_span = raw_spans[0]
1118+
1119+
# Verify OTEL span status shows the last error message
1120+
from opentelemetry.trace.status import StatusCode
1121+
assert raw_span.status.status_code == StatusCode.ERROR
1122+
assert raw_span.status.description == "Second error"
1123+
1124+
def test_error_without_status_message(self, langfuse_client, memory_exporter):
1125+
"""Test that ERROR level works even without status_message."""
1126+
# Create a span with ERROR level but no status message
1127+
span = langfuse_client.start_span(name="error-no-message-span", level="ERROR")
1128+
span.end()
1129+
1130+
# Get the raw OTEL spans to check the status
1131+
raw_spans = [
1132+
s for s in memory_exporter.get_finished_spans()
1133+
if s.name == "error-no-message-span"
1134+
]
1135+
assert len(raw_spans) == 1, "Expected one span"
1136+
raw_span = raw_spans[0]
1137+
1138+
# Verify OTEL span status was set to ERROR even without description
1139+
from opentelemetry.trace.status import StatusCode
1140+
assert raw_span.status.status_code == StatusCode.ERROR
1141+
# Description should be None when no status_message provided
1142+
assert raw_span.status.description is None
1143+
1144+
def test_different_observation_types_error_handling(self, langfuse_client, memory_exporter):
1145+
"""Test that ERROR level setting works for different observation types."""
1146+
# Test different observation types
1147+
observation_types = ["agent", "tool", "chain", "retriever", "evaluator", "embedding", "guardrail"]
1148+
1149+
# Create a parent span for child observations
1150+
with langfuse_client.start_as_current_span(name="error-test-parent") as parent:
1151+
for obs_type in observation_types:
1152+
# Create observation with ERROR level
1153+
obs = parent.start_observation(
1154+
name=f"error-{obs_type}",
1155+
as_type=obs_type,
1156+
level="ERROR",
1157+
status_message=f"{obs_type} failed"
1158+
)
1159+
obs.end()
1160+
1161+
# Check that all observations have correct OTEL status
1162+
raw_spans = memory_exporter.get_finished_spans()
1163+
1164+
for obs_type in observation_types:
1165+
obs_spans = [s for s in raw_spans if s.name == f"error-{obs_type}"]
1166+
assert len(obs_spans) == 1, f"Expected one span for {obs_type}"
1167+
1168+
raw_span = obs_spans[0]
1169+
from opentelemetry.trace.status import StatusCode
1170+
assert raw_span.status.status_code == StatusCode.ERROR, f"{obs_type} should have ERROR status"
1171+
assert raw_span.status.description == f"{obs_type} failed", f"{obs_type} should have correct description"
1172+
9471173

9481174
class TestAdvancedSpans(TestOTelBase):
9491175
"""Tests for advanced span functionality including generations, timing, and usage metrics."""

0 commit comments

Comments
 (0)