@@ -944,6 +944,232 @@ def test_error_handling(self, langfuse_client, memory_exporter):
944944 == "Test error message"
945945 )
946946
947+ def test_error_level_in_span_creation (self , langfuse_client , memory_exporter ):
948+ """Test that OTEL span status is set to ERROR when creating spans with level='ERROR'."""
949+ # Create a span with level="ERROR" at creation time
950+ span = langfuse_client .start_span (
951+ name = "create-error-span" ,
952+ level = "ERROR" ,
953+ status_message = "Initial error state"
954+ )
955+ span .end ()
956+
957+ # Get the raw OTEL spans to check the status
958+ raw_spans = [
959+ s for s in memory_exporter .get_finished_spans ()
960+ if s .name == "create-error-span"
961+ ]
962+ assert len (raw_spans ) == 1 , "Expected one span"
963+ raw_span = raw_spans [0 ]
964+
965+ # Verify OTEL span status was set to ERROR
966+ from opentelemetry .trace .status import StatusCode
967+ assert raw_span .status .status_code == StatusCode .ERROR
968+ assert raw_span .status .description == "Initial error state"
969+
970+ # Also verify Langfuse attributes
971+ spans = self .get_spans_by_name (memory_exporter , "create-error-span" )
972+ span_data = spans [0 ]
973+ attributes = span_data ["attributes" ]
974+ assert attributes [LangfuseOtelSpanAttributes .OBSERVATION_LEVEL ] == "ERROR"
975+ assert attributes [LangfuseOtelSpanAttributes .OBSERVATION_STATUS_MESSAGE ] == "Initial error state"
976+
977+ def test_error_level_in_span_update (self , langfuse_client , memory_exporter ):
978+ """Test that OTEL span status is set to ERROR when updating spans to level='ERROR'."""
979+ # Create a normal span
980+ span = langfuse_client .start_span (name = "update-error-span" , level = "INFO" )
981+
982+ # Update it to ERROR level
983+ span .update (level = "ERROR" , status_message = "Updated to error state" )
984+ span .end ()
985+
986+ # Get the raw OTEL spans to check the status
987+ raw_spans = [
988+ s for s in memory_exporter .get_finished_spans ()
989+ if s .name == "update-error-span"
990+ ]
991+ assert len (raw_spans ) == 1 , "Expected one span"
992+ raw_span = raw_spans [0 ]
993+
994+ # Verify OTEL span status was set to ERROR
995+ from opentelemetry .trace .status import StatusCode
996+ assert raw_span .status .status_code == StatusCode .ERROR
997+ assert raw_span .status .description == "Updated to error state"
998+
999+ # Also verify Langfuse attributes
1000+ spans = self .get_spans_by_name (memory_exporter , "update-error-span" )
1001+ span_data = spans [0 ]
1002+ attributes = span_data ["attributes" ]
1003+ assert attributes [LangfuseOtelSpanAttributes .OBSERVATION_LEVEL ] == "ERROR"
1004+ assert attributes [LangfuseOtelSpanAttributes .OBSERVATION_STATUS_MESSAGE ] == "Updated to error state"
1005+
1006+ def test_generation_error_level_in_creation (self , langfuse_client , memory_exporter ):
1007+ """Test that OTEL span status is set to ERROR when creating generations with level='ERROR'."""
1008+ # Create a generation with level="ERROR" at creation time
1009+ generation = langfuse_client .start_generation (
1010+ name = "create-error-generation" ,
1011+ model = "gpt-4" ,
1012+ level = "ERROR" ,
1013+ status_message = "Generation failed during creation"
1014+ )
1015+ generation .end ()
1016+
1017+ # Get the raw OTEL spans to check the status
1018+ raw_spans = [
1019+ s for s in memory_exporter .get_finished_spans ()
1020+ if s .name == "create-error-generation"
1021+ ]
1022+ assert len (raw_spans ) == 1 , "Expected one span"
1023+ raw_span = raw_spans [0 ]
1024+
1025+ # Verify OTEL span status was set to ERROR
1026+ from opentelemetry .trace .status import StatusCode
1027+ assert raw_span .status .status_code == StatusCode .ERROR
1028+ assert raw_span .status .description == "Generation failed during creation"
1029+
1030+ # Also verify Langfuse attributes
1031+ spans = self .get_spans_by_name (memory_exporter , "create-error-generation" )
1032+ span_data = spans [0 ]
1033+ attributes = span_data ["attributes" ]
1034+ assert attributes [LangfuseOtelSpanAttributes .OBSERVATION_LEVEL ] == "ERROR"
1035+ assert attributes [LangfuseOtelSpanAttributes .OBSERVATION_STATUS_MESSAGE ] == "Generation failed during creation"
1036+
1037+ def test_generation_error_level_in_update (self , langfuse_client , memory_exporter ):
1038+ """Test that OTEL span status is set to ERROR when updating generations to level='ERROR'."""
1039+ # Create a normal generation
1040+ generation = langfuse_client .start_generation (
1041+ name = "update-error-generation" ,
1042+ model = "gpt-4" ,
1043+ level = "INFO"
1044+ )
1045+
1046+ # Update it to ERROR level
1047+ generation .update (level = "ERROR" , status_message = "Generation failed during execution" )
1048+ generation .end ()
1049+
1050+ # Get the raw OTEL spans to check the status
1051+ raw_spans = [
1052+ s for s in memory_exporter .get_finished_spans ()
1053+ if s .name == "update-error-generation"
1054+ ]
1055+ assert len (raw_spans ) == 1 , "Expected one span"
1056+ raw_span = raw_spans [0 ]
1057+
1058+ # Verify OTEL span status was set to ERROR
1059+ from opentelemetry .trace .status import StatusCode
1060+ assert raw_span .status .status_code == StatusCode .ERROR
1061+ assert raw_span .status .description == "Generation failed during execution"
1062+
1063+ # Also verify Langfuse attributes
1064+ spans = self .get_spans_by_name (memory_exporter , "update-error-generation" )
1065+ span_data = spans [0 ]
1066+ attributes = span_data ["attributes" ]
1067+ assert attributes [LangfuseOtelSpanAttributes .OBSERVATION_LEVEL ] == "ERROR"
1068+ assert attributes [LangfuseOtelSpanAttributes .OBSERVATION_STATUS_MESSAGE ] == "Generation failed during execution"
1069+
1070+ def test_non_error_levels_dont_set_otel_status (self , langfuse_client , memory_exporter ):
1071+ """Test that non-ERROR levels don't set OTEL span status to ERROR."""
1072+ # Test different non-error levels
1073+ test_levels = ["INFO" , "WARNING" , "DEBUG" , None ]
1074+
1075+ for i , level in enumerate (test_levels ):
1076+ span_name = f"non-error-span-{ i } "
1077+ span = langfuse_client .start_span (name = span_name , level = level )
1078+
1079+ # Update with same level to test update path too
1080+ if level is not None :
1081+ span .update (level = level , status_message = "Not an error" )
1082+
1083+ span .end ()
1084+
1085+ # Get the raw OTEL spans to check the status
1086+ raw_spans = [
1087+ s for s in memory_exporter .get_finished_spans ()
1088+ if s .name == span_name
1089+ ]
1090+ assert len (raw_spans ) == 1 , f"Expected one span for { span_name } "
1091+ raw_span = raw_spans [0 ]
1092+
1093+ # Verify OTEL span status was NOT set to ERROR
1094+ from opentelemetry .trace .status import StatusCode
1095+ # Default status should be UNSET, not ERROR
1096+ assert raw_span .status .status_code != StatusCode .ERROR , f"Level { level } should not set ERROR status"
1097+
1098+ def test_multiple_error_updates (self , langfuse_client , memory_exporter ):
1099+ """Test that multiple ERROR level updates work correctly."""
1100+ # Create a span
1101+ span = langfuse_client .start_span (name = "multi-error-span" )
1102+
1103+ # First error update
1104+ span .update (level = "ERROR" , status_message = "First error" )
1105+
1106+ # Second error update - should overwrite the first
1107+ span .update (level = "ERROR" , status_message = "Second error" )
1108+
1109+ span .end ()
1110+
1111+ # Get the raw OTEL spans to check the status
1112+ raw_spans = [
1113+ s for s in memory_exporter .get_finished_spans ()
1114+ if s .name == "multi-error-span"
1115+ ]
1116+ assert len (raw_spans ) == 1 , "Expected one span"
1117+ raw_span = raw_spans [0 ]
1118+
1119+ # Verify OTEL span status shows the last error message
1120+ from opentelemetry .trace .status import StatusCode
1121+ assert raw_span .status .status_code == StatusCode .ERROR
1122+ assert raw_span .status .description == "Second error"
1123+
1124+ def test_error_without_status_message (self , langfuse_client , memory_exporter ):
1125+ """Test that ERROR level works even without status_message."""
1126+ # Create a span with ERROR level but no status message
1127+ span = langfuse_client .start_span (name = "error-no-message-span" , level = "ERROR" )
1128+ span .end ()
1129+
1130+ # Get the raw OTEL spans to check the status
1131+ raw_spans = [
1132+ s for s in memory_exporter .get_finished_spans ()
1133+ if s .name == "error-no-message-span"
1134+ ]
1135+ assert len (raw_spans ) == 1 , "Expected one span"
1136+ raw_span = raw_spans [0 ]
1137+
1138+ # Verify OTEL span status was set to ERROR even without description
1139+ from opentelemetry .trace .status import StatusCode
1140+ assert raw_span .status .status_code == StatusCode .ERROR
1141+ # Description should be None when no status_message provided
1142+ assert raw_span .status .description is None
1143+
1144+ def test_different_observation_types_error_handling (self , langfuse_client , memory_exporter ):
1145+ """Test that ERROR level setting works for different observation types."""
1146+ # Test different observation types
1147+ observation_types = ["agent" , "tool" , "chain" , "retriever" , "evaluator" , "embedding" , "guardrail" ]
1148+
1149+ # Create a parent span for child observations
1150+ with langfuse_client .start_as_current_span (name = "error-test-parent" ) as parent :
1151+ for obs_type in observation_types :
1152+ # Create observation with ERROR level
1153+ obs = parent .start_observation (
1154+ name = f"error-{ obs_type } " ,
1155+ as_type = obs_type ,
1156+ level = "ERROR" ,
1157+ status_message = f"{ obs_type } failed"
1158+ )
1159+ obs .end ()
1160+
1161+ # Check that all observations have correct OTEL status
1162+ raw_spans = memory_exporter .get_finished_spans ()
1163+
1164+ for obs_type in observation_types :
1165+ obs_spans = [s for s in raw_spans if s .name == f"error-{ obs_type } " ]
1166+ assert len (obs_spans ) == 1 , f"Expected one span for { obs_type } "
1167+
1168+ raw_span = obs_spans [0 ]
1169+ from opentelemetry .trace .status import StatusCode
1170+ assert raw_span .status .status_code == StatusCode .ERROR , f"{ obs_type } should have ERROR status"
1171+ assert raw_span .status .description == f"{ obs_type } failed" , f"{ obs_type } should have correct description"
1172+
9471173
9481174class TestAdvancedSpans (TestOTelBase ):
9491175 """Tests for advanced span functionality including generations, timing, and usage metrics."""
0 commit comments