2424
2525
2626# Apps
27-
28-
2927def your_llm_app (input : str ):
3028 @observe (type = "retriever" )
3129 def retriever (input : str ):
@@ -349,7 +347,7 @@ def your_llm_app_with_tools_called(input: str):
349347 Docs: LLMTestCase supports tools_called and expected_tools fields.
350348 """
351349
352- @observe (type = "tool" , name = "search_tool " )
350+ @observe (type = "tool" , name = "observe_search_tool " )
353351 def search_tool (query : str ):
354352 return "search result"
355353
@@ -755,12 +753,6 @@ def test_evals_iterator_emits_span_with_matching_input_per_golden():
755753 assert gen .get ("input" ) == golden .input
756754
757755
758- ###############################
759- # Additional coverage tests #
760- ###############################
761-
762-
763- # Consider deleting this test
764756def test_update_current_trace_sets_trace_level_test_case ():
765757 """
766758 Docs: update_current_trace can be used to set end-to-end test cases for the trace.
@@ -785,7 +777,6 @@ def test_update_current_trace_sets_trace_level_test_case():
785777 )
786778
787779
788- # consider deleting this test: not asserting anything of value
789780def test_update_current_span_with_individual_params ():
790781 """
791782 Docs: update_current_span can take individual LLMTestCase params
@@ -808,7 +799,6 @@ def test_update_current_span_with_individual_params():
808799 assert gen .get ("output" ) == "MOCK_RESPONSE"
809800
810801
811- # consider deleting this test: Low value
812802def test_observe_name_parameter_customizes_span_name ():
813803 """
814804 Docs: The @observe decorator accepts a `name` parameter to customize
@@ -894,15 +884,49 @@ def test_llm_test_case_with_tools_called():
894884 assert gen is not None
895885
896886 # Also verify tool span was created
897- tool_span = find_span_by_name (trace_dict , "search_tool " )
887+ tool_span = find_span_by_name (trace_dict , "observe_search_tool " )
898888 tool_names = span_names_by_key (trace_dict , "toolSpans" )
899889 base_names = span_names_by_key (trace_dict , "baseSpans" )
900890
901891 assert (
902892 tool_span is not None
903- or "search_tool" in tool_names
904- or "search_tool" in base_names
905- ), f"Expected search_tool span. Got: { debug_span_names (trace_dict )} "
893+ or "observe_search_tool" in tool_names
894+ or "observe_search_tool" in base_names
895+ ), f"Expected observe_search_tool span. Got: { debug_span_names (trace_dict )} "
896+
897+ assert find_span_by_name (trace_dict , "search_tool" ) is None
898+
899+
900+ def test_update_current_span_name_overrides_observer_name ():
901+ @observe (type = "tool" , name = "observer_name" )
902+ def tool_fn (x : str ):
903+ update_current_span (name = "update_name" ) # if supported in your API
904+ return "ok"
905+
906+ out = tool_fn ("x" )
907+ assert out == "ok"
908+
909+ trace_dict = get_latest_trace_dict ()
910+ span = find_span_by_name (trace_dict , "update_name" )
911+ assert span is not None , (
912+ "Expected update_current_span(name=...) to override @observe(name=...). "
913+ f"Got: { debug_span_names (trace_dict )} "
914+ )
915+ assert find_span_by_name (trace_dict , "observer_name" ) is None
916+
917+
918+ def test_update_current_span_output_not_overridden_by_observer_kwargs ():
919+ @observe (name = "tool_span" , type = "tool" , output = "SHOULD_NOT_WIN" )
920+ def tool_fn (x : str ):
921+ update_current_span (output = "SHOULD_WIN" )
922+ return "ok"
923+
924+ tool_fn ("x" )
925+
926+ trace_dict = get_latest_trace_dict ()
927+ span = find_span_by_name (trace_dict , "tool_span" )
928+ assert span is not None
929+ assert span .get ("output" ) == "SHOULD_WIN"
906930
907931
908932def test_golden_with_expected_output ():
@@ -1068,3 +1092,163 @@ def agent_func(input: str):
10681092 "test_agent" in agent_names
10691093 or find_span_by_name (trace_dict , "test_agent" ) is not None
10701094 ), f"Expected test_agent span. Got: { debug_span_names (trace_dict )} "
1095+
1096+
1097+ ###############################################################
1098+ # Checklist: Nested execution contexts produce parent/child #
1099+ # span relationships with explicit UUID edges #
1100+ ###############################################################
1101+
1102+
1103+ def test_nested_spans_parent_child_uuid_relationships ():
1104+ """
1105+ Checklist item 1: Nested execution contexts correctly produce parent and
1106+ child span relationships.
1107+
1108+ This test verifies:
1109+ - All spans have a uuid field
1110+ - Parent-child relationships are explicitly linked via parentUuid == parent.uuid
1111+ - The tree structure is: nested_app (root) -> outer_retriever -> inner_retriever
1112+ -> generator
1113+ """
1114+ out = your_llm_app_nested_spans ("How are you?" )
1115+ assert out == "MOCK_RESPONSE"
1116+
1117+ trace_dict = get_latest_trace_dict ()
1118+ assert trace_dict is not None
1119+
1120+ spans = all_spans (trace_dict )
1121+
1122+ def by_name (n : str ):
1123+ return next ((s for s in spans if s .get ("name" ) == n ), None )
1124+
1125+ app_span = by_name ("nested_app" )
1126+ outer_span = by_name ("outer_retriever" )
1127+ inner_span = by_name ("inner_retriever" )
1128+ gen_span = by_name ("generator" )
1129+
1130+ # All spans must exist
1131+ assert (
1132+ app_span is not None
1133+ ), f"Missing nested_app. Available: { [s .get ('name' ) for s in spans ]} "
1134+ assert (
1135+ outer_span is not None
1136+ ), f"Missing outer_retriever. Available: { [s .get ('name' ) for s in spans ]} "
1137+ assert (
1138+ inner_span is not None
1139+ ), f"Missing inner_retriever. Available: { [s .get ('name' ) for s in spans ]} "
1140+ assert (
1141+ gen_span is not None
1142+ ), f"Missing generator. Available: { [s .get ('name' ) for s in spans ]} "
1143+
1144+ # All spans must have a uuid
1145+ assert app_span .get ("uuid" ), "nested_app must have uuid"
1146+ assert outer_span .get ("uuid" ), "outer_retriever must have uuid"
1147+ assert inner_span .get ("uuid" ), "inner_retriever must have uuid"
1148+ assert gen_span .get ("uuid" ), "generator must have uuid"
1149+
1150+ # Verify explicit parent-child UUID relationships
1151+ # nested_app is root (parentUuid is None or missing)
1152+ assert (
1153+ app_span .get ("parentUuid" ) is None
1154+ ), f"nested_app should be root span with no parent, got parentUuid={ app_span .get ('parentUuid' )} "
1155+
1156+ # outer_retriever.parentUuid == nested_app.uuid
1157+ assert outer_span .get ("parentUuid" ) == app_span .get ("uuid" ), (
1158+ f"outer_retriever.parentUuid should equal nested_app.uuid. "
1159+ f"Got parentUuid={ outer_span .get ('parentUuid' )} , expected={ app_span .get ('uuid' )} "
1160+ )
1161+
1162+ # inner_retriever.parentUuid == outer_retriever.uuid
1163+ assert inner_span .get ("parentUuid" ) == outer_span .get ("uuid" ), (
1164+ f"inner_retriever.parentUuid should equal outer_retriever.uuid. "
1165+ f"Got parentUuid={ inner_span .get ('parentUuid' )} , expected={ outer_span .get ('uuid' )} "
1166+ )
1167+
1168+ # generator.parentUuid == nested_app.uuid
1169+ assert gen_span .get ("parentUuid" ) == app_span .get ("uuid" ), (
1170+ f"generator.parentUuid should equal nested_app.uuid. "
1171+ f"Got parentUuid={ gen_span .get ('parentUuid' )} , expected={ app_span .get ('uuid' )} "
1172+ )
1173+
1174+
1175+ ###############################################################
1176+ # Checklist: Component-level outputs convert to serialized #
1177+ # structure (TraceApi) with expected keys #
1178+ ###############################################################
1179+
1180+
1181+ def test_trace_serialization_contains_expected_top_level_keys ():
1182+ """
1183+ Checklist item 2: Component-level outputs can be converted into a test run
1184+ or serialized structure with the expected keys.
1185+
1186+ Verifies the trace_dict (TraceApi serialized output) contains:
1187+ - Top-level keys: uuid, startTime, endTime, status
1188+ - Typed span bucket keys: baseSpans, agentSpans, llmSpans, retrieverSpans, toolSpans
1189+ - Each bucket is a list
1190+ - Spans in buckets have required keys: uuid, name, status, startTime, endTime
1191+ """
1192+ out = your_llm_app_rooted ("How are you?" )
1193+ assert out == "MOCK_RESPONSE"
1194+
1195+ trace_dict = get_latest_trace_dict ()
1196+ assert trace_dict is not None
1197+
1198+ # Top-level trace keys
1199+ assert "uuid" in trace_dict , "Trace must have 'uuid' key"
1200+ assert "startTime" in trace_dict , "Trace must have 'startTime' key"
1201+ assert "endTime" in trace_dict , "Trace must have 'endTime' key"
1202+ assert "status" in trace_dict , "Trace must have 'status' key"
1203+
1204+ # Typed span bucket keys must exist as lists
1205+ expected_buckets = [
1206+ "baseSpans" ,
1207+ "agentSpans" ,
1208+ "llmSpans" ,
1209+ "retrieverSpans" ,
1210+ "toolSpans" ,
1211+ ]
1212+ for bucket in expected_buckets :
1213+ assert bucket in trace_dict , f"Trace must have '{ bucket } ' key"
1214+ assert isinstance (
1215+ trace_dict [bucket ], list
1216+ ), f"'{ bucket } ' must be a list"
1217+
1218+ # Verify spans have required per-span keys
1219+ required_span_keys = {"uuid" , "name" , "status" , "startTime" , "endTime" }
1220+ for bucket in expected_buckets :
1221+ for span in trace_dict [bucket ]:
1222+ missing = required_span_keys - set (span .keys ())
1223+ assert (
1224+ not missing
1225+ ), f"Span '{ span .get ('name' )} ' in { bucket } missing keys: { missing } "
1226+
1227+
1228+ ###############################################################
1229+ # Regression: ToolSpan kwargs collision fix #
1230+ ###############################################################
1231+
1232+
1233+ def test_observe_tool_with_name_kwarg_does_not_crash ():
1234+ """
1235+ Regression test: @observe(type="tool", name="...") previously crashed due to
1236+ kwargs collision when 'name' was passed both in observe_kwargs and span_kwargs.
1237+ The fix filters observe_kwargs to ToolSpan model fields and drops colliding keys.
1238+ """
1239+
1240+ @observe (type = "tool" , name = "my_named_tool" )
1241+ def named_tool_func (arg : str ) -> str :
1242+ return f"tool output: { arg } "
1243+
1244+ result = named_tool_func ("test_input" )
1245+ assert result == "tool output: test_input"
1246+
1247+ trace_dict = get_latest_trace_dict ()
1248+ assert trace_dict is not None
1249+
1250+ # The tool span should be in toolSpans with the custom name
1251+ tool_names = span_names_by_key (trace_dict , "toolSpans" )
1252+ assert (
1253+ "my_named_tool" in tool_names
1254+ ), f"Expected 'my_named_tool' in toolSpans. Got: { tool_names } "
0 commit comments