diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py index 457ead64212..fa6c7e5b838 100644 --- a/ddtrace/llmobs/_llmobs.py +++ b/ddtrace/llmobs/_llmobs.py @@ -465,7 +465,7 @@ def _do_annotations(self, span: Span) -> None: with self._annotation_context_lock: for _, context_id, annotation_kwargs in self._instance._annotations: if current_context_id == context_id: - self.annotate(span, **annotation_kwargs) + self.annotate(span, **annotation_kwargs, _suppress_span_kind_error=True) def _child_after_fork(self) -> None: self._llmobs_span_writer = self._llmobs_span_writer.recreate() @@ -505,7 +505,7 @@ def _stop_service(self) -> None: core.reset_listeners("trace.span_start", self._on_span_start) core.reset_listeners("trace.span_finish", self._on_span_finish) core.reset_listeners("http.span_inject", self._inject_llmobs_context) - core.reset_listeners("http.activate_distributed_headers", self._activate_llmobs_distributed_context) + core.reset_listeners("http.activate_distributed_headers", self._activate_llmobs_distributed_context_soft_fail) core.reset_listeners("threading.submit", self._current_trace_context) core.reset_listeners("threading.execution", self._llmobs_context_provider.activate) core.reset_listeners("asyncio.create_task", self._on_asyncio_create_task) @@ -620,7 +620,7 @@ def enable( core.on("trace.span_start", cls._instance._on_span_start) core.on("trace.span_finish", cls._instance._on_span_finish) core.on("http.span_inject", cls._inject_llmobs_context) - core.on("http.activate_distributed_headers", cls._activate_llmobs_distributed_context) + core.on("http.activate_distributed_headers", cls._activate_llmobs_distributed_context_soft_fail) core.on("threading.submit", cls._instance._current_trace_context, "llmobs_ctx") core.on("threading.execution", cls._instance._llmobs_context_provider.activate) core.on("asyncio.create_task", cls._instance._on_asyncio_create_task) @@ -1014,16 +1014,14 @@ def export_span(cls, span: Optional[Span] = None) -> Optional[ExportedLLMObsSpan try: if span.span_type != SpanTypes.LLM: error = "invalid_span" - log.warning("Span must be an LLMObs-generated span.") - return None + raise Exception("Span must be an LLMObs-generated span.") return ExportedLLMObsSpan( span_id=str(span.span_id), trace_id=format_trace_id(span._get_ctx_item(LLMOBS_TRACE_ID) or span.trace_id), ) except (TypeError, AttributeError): error = "invalid_span" - log.warning("Failed to export span. Span must be a valid Span object.") - return None + raise Exception("Failed to export span. Span must be a valid Span object.") from None finally: telemetry.record_span_exported(span, error) @@ -1338,6 +1336,7 @@ def annotate( tags: Optional[Dict[str, Any]] = None, tool_definitions: Optional[List[Dict[str, Any]]] = None, _name: Optional[str] = None, + _suppress_span_kind_error: bool = False, ) -> None: """ Sets metadata, inputs, outputs, tags, and metrics as provided for a given LLMObs span. @@ -1397,32 +1396,29 @@ def annotate( span = cls._instance._current_span() if span is None: error = "invalid_span_no_active_spans" - log.warning("No span provided and no active LLMObs-generated span found.") - return + raise Exception("No span provided and no active LLMObs-generated span found.") if span.span_type != SpanTypes.LLM: error = "invalid_span_type" - log.warning("Span must be an LLMObs-generated span.") - return + raise Exception("Span must be an LLMObs-generated span.") if span.finished: error = "invalid_finished_span" - log.warning("Cannot annotate a finished span.") - return + raise Exception("Cannot annotate a finished span.") if metadata is not None: if not isinstance(metadata, dict): error = "invalid_metadata" - log.warning("metadata must be a dictionary") + raise Exception("metadata must be a dictionary") else: cls._set_dict_attribute(span, METADATA, metadata) if metrics is not None: if not isinstance(metrics, dict) or not all(isinstance(v, (int, float)) for v in metrics.values()): error = "invalid_metrics" - log.warning("metrics must be a dictionary of string key - numeric value pairs.") + raise Exception("metrics must be a dictionary of string key - numeric value pairs.") else: cls._set_dict_attribute(span, METRICS, metrics) if tags is not None: if not isinstance(tags, dict): error = "invalid_tags" - log.warning("span tags must be a dictionary of string key - primitive value pairs.") + raise Exception("span tags must be a dictionary of string key - primitive value pairs.") else: session_id = tags.get("session_id") if session_id: @@ -1441,10 +1437,11 @@ def annotate( cls._set_dict_attribute(span, INPUT_PROMPT, validated_prompt) except (ValueError, TypeError) as e: error = "invalid_prompt" - log.warning("Failed to validate prompt with error:", str(e), exc_info=True) - if not span_kind: - log.debug("Span kind not specified, skipping annotation for input/output data") - return + raise Exception("Failed to validate prompt with error:", str(e)) + if ( + not span_kind and not _suppress_span_kind_error + ): # TODO(sabrenner): we should figure out how to remove this check for annotation contexts + raise Exception("Span kind not specified, skipping annotation for input/output data") if input_data is not None or output_data is not None: if span_kind == "llm": error = cls._tag_llm_io(span, input_messages=input_data, output_messages=output_data) @@ -1471,7 +1468,9 @@ def _tag_llm_io(cls, span, input_messages=None, output_messages=None) -> Optiona if input_messages.messages: span._set_ctx_item(INPUT_MESSAGES, input_messages.messages) except TypeError: - log.warning("Failed to parse input messages.", exc_info=True) + log.warning( + "Failed to parse input messages.", exc_info=True + ) # TODO: figure out how to raise this error and return the error type return "invalid_io_messages" if output_messages is None: return None @@ -1482,7 +1481,9 @@ def _tag_llm_io(cls, span, input_messages=None, output_messages=None) -> Optiona return None span._set_ctx_item(OUTPUT_MESSAGES, output_messages.messages) except TypeError: - log.warning("Failed to parse output messages.", exc_info=True) + log.warning( + "Failed to parse output messages.", exc_info=True + ) # TODO: figure out how to raise this error and return the error type return "invalid_io_messages" return None @@ -1498,7 +1499,9 @@ def _tag_embedding_io(cls, span, input_documents=None, output_text=None) -> Opti if input_documents.documents: span._set_ctx_item(INPUT_DOCUMENTS, input_documents.documents) except TypeError: - log.warning("Failed to parse input documents.", exc_info=True) + log.warning( + "Failed to parse input documents.", exc_info=True + ) # TODO: figure out how to raise this error and return the error type return "invalid_embedding_io" if output_text is None: return None @@ -1521,7 +1524,9 @@ def _tag_retrieval_io(cls, span, input_text=None, output_documents=None) -> Opti return None span._set_ctx_item(OUTPUT_DOCUMENTS, output_documents.documents) except TypeError: - log.warning("Failed to parse output documents.", exc_info=True) + log.warning( + "Failed to parse output documents.", exc_info=True + ) # TODO: figure out how to raise this error and return the error type return "invalid_retrieval_io" return None @@ -1712,17 +1717,15 @@ def submit_evaluation( raise TypeError("value must be a boolean for a boolean metric.") if tags is not None and not isinstance(tags, dict): - log.warning("tags must be a dictionary of string key-value pairs.") - tags = {} + raise Exception("tags must be a dictionary of string key-value pairs.") ml_app = ml_app if ml_app else config._llmobs_ml_app if not ml_app: error = "missing_ml_app" - log.warning( + raise Exception( "ML App name is required for sending evaluation metrics. Evaluation metric data will not be sent. " "Ensure this configuration is set before running your application." ) - return evaluation_tags = { "ddtrace.version": ddtrace.__version__, @@ -1735,7 +1738,7 @@ def submit_evaluation( evaluation_tags[ensure_text(k)] = ensure_text(v) except TypeError: error = "invalid_tags" - log.warning("Failed to parse tags. Tags for evaluation metrics must be strings.") + raise Exception("Failed to parse tags. Tags for evaluation metrics must be strings.") evaluation_metric: LLMObsEvaluationMetricEvent = { "join_on": join_on, @@ -1750,20 +1753,20 @@ def submit_evaluation( if assessment: if not isinstance(assessment, str) or assessment not in ("pass", "fail"): error = "invalid_assessment" - log.warning("Failed to parse assessment. assessment must be either 'pass' or 'fail'.") + raise Exception("Failed to parse assessment. assessment must be either 'pass' or 'fail'.") else: evaluation_metric["assessment"] = assessment if reasoning: if not isinstance(reasoning, str): error = "invalid_reasoning" - log.warning("Failed to parse reasoning. reasoning must be a string.") + raise Exception("Failed to parse reasoning. reasoning must be a string.") else: evaluation_metric["reasoning"] = reasoning if metadata: if not isinstance(metadata, dict): error = "invalid_metadata" - log.warning("metadata must be json serializable dictionary.") + raise Exception("metadata must be json serializable dictionary.") else: metadata = safe_json(metadata) if metadata and isinstance(metadata, str): @@ -1801,7 +1804,9 @@ def _inject_llmobs_context(cls, span_context: Context, request_headers: Dict[str span_context._meta[PROPAGATED_ML_APP_KEY] = ml_app @classmethod - def inject_distributed_headers(cls, request_headers: Dict[str, str], span: Optional[Span] = None) -> Dict[str, str]: + def inject_distributed_headers( + cls, request_headers: Dict[str, str], span: Optional[Span] = None, _soft_fail: bool = False + ) -> Dict[str, str]: """Injects the span's distributed context into the given request headers.""" if cls.enabled is False: log.warning( @@ -1813,53 +1818,74 @@ def inject_distributed_headers(cls, request_headers: Dict[str, str], span: Optio try: if not isinstance(request_headers, dict): error = "invalid_request_headers" - log.warning("request_headers must be a dictionary of string key-value pairs.") - return request_headers + if _soft_fail: + log.warning("request_headers must be a dictionary of string key-value pairs.") + return request_headers + else: + raise Exception("request_headers must be a dictionary of string key-value pairs.") if span is None: span = cls._instance.tracer.current_span() if span is None: error = "no_active_span" - log.warning("No span provided and no currently active span found.") - return request_headers + if _soft_fail: + log.warning("No span provided and no currently active span found.") + return request_headers + raise Exception("No span provided and no currently active span found.") if not isinstance(span, Span): error = "invalid_span" - log.warning("span must be a valid Span object. Distributed context will not be injected.") - return request_headers + if _soft_fail: + log.warning("span must be a valid Span object. Distributed context will not be injected.") + return request_headers + raise Exception("span must be a valid Span object. Distributed context will not be injected.") HTTPPropagator.inject(span.context, request_headers) return request_headers finally: telemetry.record_inject_distributed_headers(error) @classmethod - def _activate_llmobs_distributed_context(cls, request_headers: Dict[str, str], context: Context) -> Optional[str]: - if cls.enabled is False: - return None - if not context.trace_id or not context.span_id: - log.warning("Failed to extract trace/span ID from request headers.") - return "missing_context" - _parent_id = context._meta.get(PROPAGATED_PARENT_ID_KEY) - if _parent_id is None: - log.debug("Failed to extract LLMObs parent ID from request headers.") - return "missing_parent_id" + def _activate_llmobs_distributed_context_soft_fail(cls, request_headers: Dict[str, str], context: Context) -> None: + cls._activate_llmobs_distributed_context(request_headers, context, _soft_fail=True) + + @classmethod + def _activate_llmobs_distributed_context( + cls, request_headers: Dict[str, str], context: Context, _soft_fail: bool = False + ) -> None: + error = None try: - parent_id = int(_parent_id) - except ValueError: - log.warning("Failed to parse LLMObs parent ID from request headers.") - return "invalid_parent_id" - parent_llmobs_trace_id = context._meta.get(PROPAGATED_LLMOBS_TRACE_ID_KEY) - if parent_llmobs_trace_id is None: - log.debug("Failed to extract LLMObs trace ID from request headers. Expected string, got None.") + if cls.enabled is False: + return + if not context.trace_id or not context.span_id: + error = "missing_context" + if _soft_fail: + log.warning("Failed to extract trace/span ID from request headers.") + return + raise Exception("Failed to extract trace/span ID from request headers.") + _parent_id = context._meta.get(PROPAGATED_PARENT_ID_KEY) + if _parent_id is None: + error = "missing_parent_id" + log.debug("Failed to extract LLMObs parent ID from request headers.") + return + try: + parent_id = int(_parent_id) + except ValueError: + error = "invalid_parent_id" + log.warning("Failed to parse LLMObs parent ID from request headers.") + return + parent_llmobs_trace_id = context._meta.get(PROPAGATED_LLMOBS_TRACE_ID_KEY) + if parent_llmobs_trace_id is None: + log.debug("Failed to extract LLMObs trace ID from request headers. Expected string, got None.") + llmobs_context = Context(trace_id=context.trace_id, span_id=parent_id) + llmobs_context._meta[PROPAGATED_LLMOBS_TRACE_ID_KEY] = str(context.trace_id) + cls._instance._llmobs_context_provider.activate(llmobs_context) + error = "missing_parent_llmobs_trace_id" llmobs_context = Context(trace_id=context.trace_id, span_id=parent_id) - llmobs_context._meta[PROPAGATED_LLMOBS_TRACE_ID_KEY] = str(context.trace_id) + llmobs_context._meta[PROPAGATED_LLMOBS_TRACE_ID_KEY] = str(parent_llmobs_trace_id) cls._instance._llmobs_context_provider.activate(llmobs_context) - return "missing_parent_llmobs_trace_id" - llmobs_context = Context(trace_id=context.trace_id, span_id=parent_id) - llmobs_context._meta[PROPAGATED_LLMOBS_TRACE_ID_KEY] = str(parent_llmobs_trace_id) - cls._instance._llmobs_context_provider.activate(llmobs_context) - return None + finally: + telemetry.record_activate_distributed_headers(error) @classmethod - def activate_distributed_headers(cls, request_headers: Dict[str, str]) -> None: + def activate_distributed_headers(cls, request_headers: Dict[str, str], _soft_fail: bool = False) -> None: """ Activates distributed tracing headers for the current request. @@ -1873,8 +1899,7 @@ def activate_distributed_headers(cls, request_headers: Dict[str, str]) -> None: return context = HTTPPropagator.extract(request_headers) cls._instance.tracer.context_provider.activate(context) - error = cls._instance._activate_llmobs_distributed_context(request_headers, context) - telemetry.record_activate_distributed_headers(error) + cls._instance._activate_llmobs_distributed_context(request_headers, context, _soft_fail=_soft_fail) # initialize the default llmobs instance diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py index 5a0c233002b..8a60c2d4416 100644 --- a/tests/llmobs/test_llmobs_service.py +++ b/tests/llmobs/test_llmobs_service.py @@ -26,6 +26,7 @@ from ddtrace.llmobs._constants import OUTPUT_MESSAGES from ddtrace.llmobs._constants import OUTPUT_VALUE from ddtrace.llmobs._constants import PROPAGATED_ML_APP_KEY +from ddtrace.llmobs._constants import PROPAGATED_PARENT_ID_KEY from ddtrace.llmobs._constants import SESSION_ID from ddtrace.llmobs._constants import SPAN_KIND from ddtrace.llmobs._constants import SPAN_START_WHILE_DISABLED_WARNING @@ -424,23 +425,26 @@ def test_embedding_span(llmobs, llmobs_events): ) -def test_annotate_no_active_span_logs_warning(llmobs, mock_llmobs_logs): - llmobs.annotate(metadata={"test": "test"}) - mock_llmobs_logs.warning.assert_called_once_with("No span provided and no active LLMObs-generated span found.") +def test_annotate_no_active_span_logs_warning(llmobs): + with pytest.raises(Exception) as excinfo: + llmobs.annotate(metadata={"test": "test"}) + assert str(excinfo.value) == "No span provided and no active LLMObs-generated span found." -def test_annotate_non_llm_span_logs_warning(llmobs, mock_llmobs_logs): +def test_annotate_non_llm_span_logs_warning(llmobs): dummy_tracer = DummyTracer() with dummy_tracer.trace("root") as non_llmobs_span: - llmobs.annotate(span=non_llmobs_span, metadata={"test": "test"}) - mock_llmobs_logs.warning.assert_called_once_with("Span must be an LLMObs-generated span.") + with pytest.raises(Exception) as excinfo: + llmobs.annotate(span=non_llmobs_span, metadata={"test": "test"}) + assert str(excinfo.value) == "Span must be an LLMObs-generated span." -def test_annotate_finished_span_does_nothing(llmobs, mock_llmobs_logs): +def test_annotate_finished_span_does_nothing(llmobs): with llmobs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: pass - llmobs.annotate(span=span, metadata={"test": "test"}) - mock_llmobs_logs.warning.assert_called_once_with("Cannot annotate a finished span.") + with pytest.raises(Exception) as excinfo: + llmobs.annotate(span=span, metadata={"test": "test"}) + assert str(excinfo.value) == "Cannot annotate a finished span." def test_annotate_metadata(llmobs): @@ -462,12 +466,11 @@ def test_annotate_metadata_updates(llmobs): } -def test_annotate_metadata_wrong_type_raises_warning(llmobs, mock_llmobs_logs): +def test_annotate_metadata_wrong_type_raises_warning(llmobs): with llmobs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: - llmobs.annotate(span=span, metadata="wrong_metadata") - assert span._get_ctx_item(METADATA) is None - mock_llmobs_logs.warning.assert_called_once_with("metadata must be a dictionary") - mock_llmobs_logs.reset_mock() + with pytest.raises(Exception) as excinfo: + llmobs.annotate(span=span, metadata="wrong_metadata") + assert str(excinfo.value) == "metadata must be a dictionary" def test_annotate_tag(llmobs): @@ -483,13 +486,11 @@ def test_annotate_tag_can_set_session_id(llmobs): assert span._get_ctx_item(SESSION_ID) == "1234567890" -def test_annotate_tag_wrong_type(llmobs, mock_llmobs_logs): +def test_annotate_tag_wrong_type(llmobs): with llmobs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span: - llmobs.annotate(span=span, tags=12345) - assert span._get_ctx_item(TAGS) is None - mock_llmobs_logs.warning.assert_called_once_with( - "span tags must be a dictionary of string key - primitive value pairs." - ) + with pytest.raises(Exception) as excinfo: + llmobs.annotate(span=span, tags=12345) + assert str(excinfo.value) == "span tags must be a dictionary of string key - primitive value pairs." def test_annotate_input_string(llmobs): @@ -758,14 +759,11 @@ def test_annotate_metrics_updates(llmobs): assert span._get_ctx_item(METRICS) == {"input_tokens": 20, "output_tokens": 20, "total_tokens": 40} -def test_annotate_metrics_wrong_type(llmobs, mock_llmobs_logs): +def test_annotate_metrics_wrong_type(llmobs): with llmobs.llm(model_name="test_model") as llm_span: - llmobs.annotate(span=llm_span, metrics=12345) - assert llm_span._get_ctx_item(METRICS) is None - mock_llmobs_logs.warning.assert_called_once_with( - "metrics must be a dictionary of string key - numeric value pairs." - ) - mock_llmobs_logs.reset_mock() + with pytest.raises(Exception) as excinfo: + llmobs.annotate(span=llm_span, metrics=12345) + assert str(excinfo.value) == "metrics must be a dictionary of string key - numeric value pairs." def test_annotate_prompt_dict(llmobs): @@ -835,20 +833,21 @@ def test_annotate_prompt_typed_dict(llmobs): } -def test_annotate_prompt_wrong_type(llmobs, mock_llmobs_logs): +def test_annotate_prompt_wrong_type(llmobs): with llmobs.llm(model_name="test_model") as span: - llmobs.annotate(span=span, prompt="prompt") - assert span._get_ctx_item(INPUT_PROMPT) is None - mock_llmobs_logs.warning.assert_called_once_with( - "Failed to validate prompt with error:", "Prompt must be a dictionary, received str.", exc_info=True + with pytest.raises(Exception) as excinfo: + llmobs.annotate(span=span, prompt="prompt") + assert excinfo.value.args == ( + "Failed to validate prompt with error:", + "Prompt must be a dictionary, received str.", ) - mock_llmobs_logs.reset_mock() - llmobs.annotate(span=span, prompt={"template": 1}) - mock_llmobs_logs.warning.assert_called_once_with( - "Failed to validate prompt with error:", "template: 1 must be a string, received int", exc_info=True + with pytest.raises(Exception) as excinfo: + llmobs.annotate(span=span, prompt={"template": 1}) + assert excinfo.value.args == ( + "Failed to validate prompt with error:", + "template: 1 must be a string, received int", ) - mock_llmobs_logs.reset_mock() def test_span_error_sets_error(llmobs, llmobs_events): @@ -916,15 +915,17 @@ def test_ml_app_override(llmobs, llmobs_events): assert llmobs_events[6] == _expected_llmobs_non_llm_span_event(span, "retrieval", tags={"ml_app": "test_app"}) -def test_export_span_specified_span_is_incorrect_type_raises_warning(llmobs, mock_llmobs_logs): - llmobs.export_span(span="asd") - mock_llmobs_logs.warning.assert_called_once_with("Failed to export span. Span must be a valid Span object.") +def test_export_span_specified_span_is_incorrect_type_raises_warning(llmobs): + with pytest.raises(Exception) as excinfo: + llmobs.export_span(span="asd") + assert str(excinfo.value) == "Failed to export span. Span must be a valid Span object." -def test_export_span_specified_span_is_not_llmobs_span_raises_warning(llmobs, mock_llmobs_logs): +def test_export_span_specified_span_is_not_llmobs_span_raises_warning(llmobs): with DummyTracer().trace("non_llmobs_span") as span: - llmobs.export_span(span=span) - mock_llmobs_logs.warning.assert_called_once_with("Span must be an LLMObs-generated span.") + with pytest.raises(Exception) as excinfo: + llmobs.export_span(span=span) + assert str(excinfo.value) == "Span must be an LLMObs-generated span." def test_export_span_specified_span_returns_span_context(llmobs): @@ -980,24 +981,17 @@ def test_inject_distributed_headers_llmobs_disabled_does_nothing(llmobs, mock_ll assert headers == {} -def test_inject_distributed_headers_not_dict_logs_warning(llmobs, mock_llmobs_logs): - headers = llmobs.inject_distributed_headers("not a dictionary", span=None) - mock_llmobs_logs.warning.assert_called_once_with("request_headers must be a dictionary of string key-value pairs.") - assert headers == "not a dictionary" - mock_llmobs_logs.reset_mock() - headers = llmobs.inject_distributed_headers(123, span=None) - mock_llmobs_logs.warning.assert_called_once_with("request_headers must be a dictionary of string key-value pairs.") - assert headers == 123 - mock_llmobs_logs.reset_mock() - headers = llmobs.inject_distributed_headers(None, span=None) - mock_llmobs_logs.warning.assert_called_once_with("request_headers must be a dictionary of string key-value pairs.") - assert headers is None +@pytest.mark.parametrize("request_headers", ["not a dictionary", 123, None]) +def test_inject_distributed_headers_not_dict_logs_warning(llmobs, request_headers): + with pytest.raises(Exception) as excinfo: + llmobs.inject_distributed_headers(request_headers, span=None) + assert str(excinfo.value) == "request_headers must be a dictionary of string key-value pairs." -def test_inject_distributed_headers_no_active_span_logs_warning(llmobs, mock_llmobs_logs): - headers = llmobs.inject_distributed_headers({}, span=None) - mock_llmobs_logs.warning.assert_called_once_with("No span provided and no currently active span found.") - assert headers == {} +def test_inject_distributed_headers_no_active_span_logs_warning(llmobs): + with pytest.raises(Exception) as excinfo: + llmobs.inject_distributed_headers({}, span=None) + assert str(excinfo.value) == "No span provided and no currently active span found." def test_inject_distributed_headers_span_calls_httppropagator_inject(llmobs, mock_llmobs_logs): @@ -1032,36 +1026,29 @@ def test_activate_distributed_headers_calls_httppropagator_extract(llmobs, mock_ mock_extract.assert_called_once_with({}) -def test_activate_distributed_headers_no_trace_id_does_nothing(llmobs, mock_llmobs_logs): - with mock.patch("ddtrace.llmobs._llmobs.HTTPPropagator.extract") as mock_extract: - mock_extract.return_value = Context(span_id=123) +def test_activate_distributed_headers_no_trace_id_raises(llmobs): + with pytest.raises(Exception) as excinfo: llmobs.activate_distributed_headers({}) - assert mock_extract.call_count == 1 - mock_llmobs_logs.warning.assert_called_once_with("Failed to extract trace/span ID from request headers.") + assert str(excinfo.value) == "Failed to extract trace/span ID from request headers." -def test_activate_distributed_headers_no_span_id_does_nothing(llmobs, mock_llmobs_logs): - with mock.patch("ddtrace.llmobs._llmobs.HTTPPropagator.extract") as mock_extract: - mock_extract.return_value = Context(trace_id=123) +def test_activate_distributed_headers_no_span_id_raises(llmobs): + with pytest.raises(Exception) as excinfo: llmobs.activate_distributed_headers({}) - assert mock_extract.call_count == 1 - mock_llmobs_logs.warning.assert_called_once_with("Failed to extract trace/span ID from request headers.") + assert str(excinfo.value) == "Failed to extract trace/span ID from request headers." def test_activate_distributed_headers_no_llmobs_parent_id_does_nothing(llmobs, mock_llmobs_logs): with mock.patch("ddtrace.llmobs._llmobs.HTTPPropagator.extract") as mock_extract: dummy_context = Context(trace_id=123, span_id=456) mock_extract.return_value = dummy_context - with mock.patch("ddtrace.llmobs.LLMObs._instance.tracer.context_provider.activate") as mock_activate: - llmobs.activate_distributed_headers({}) - assert mock_extract.call_count == 1 - mock_llmobs_logs.debug.assert_called_once_with("Failed to extract LLMObs parent ID from request headers.") - mock_activate.assert_called_once_with(dummy_context) + llmobs.activate_distributed_headers({}) + mock_llmobs_logs.debug.assert_called_once_with("Failed to extract LLMObs parent ID from request headers.") -def test_activate_distributed_headers_activates_context(llmobs, mock_llmobs_logs): +def test_activate_distributed_headers_activates_context(llmobs): with mock.patch("ddtrace.llmobs._llmobs.HTTPPropagator.extract") as mock_extract: - dummy_context = Context(trace_id=123, span_id=456) + dummy_context = Context(trace_id=123, span_id=456, meta={PROPAGATED_PARENT_ID_KEY: "123"}) mock_extract.return_value = dummy_context with mock.patch("ddtrace.llmobs.LLMObs._instance.tracer.context_provider.activate") as mock_activate: llmobs.activate_distributed_headers({}) @@ -1537,15 +1524,16 @@ def test_service_enable_does_not_start_evaluator_runner(): llmobs_service.disable() -def test_submit_evaluation_no_ml_app_raises_warning(llmobs, mock_llmobs_logs): +def test_submit_evaluation_no_ml_app_raises_warning(llmobs): with override_global_config(dict(_llmobs_ml_app="")): - llmobs.submit_evaluation( - span={"span_id": "123", "trace_id": "456"}, - label="toxicity", - metric_type="categorical", - value="high", - ) - mock_llmobs_logs.warning.assert_called_once_with( + with pytest.raises(Exception) as excinfo: + llmobs.submit_evaluation( + span={"span_id": "123", "trace_id": "456"}, + label="toxicity", + metric_type="categorical", + value="high", + ) + assert str(excinfo.value) == ( "ML App name is required for sending evaluation metrics. Evaluation metric data will not be sent. " "Ensure this configuration is set before running your application." ) @@ -1653,46 +1641,32 @@ def test_submit_evaluation_incorrect_score_value_type_raises_error(llmobs, mock_ def test_submit_evaluation_invalid_tags_raises_warning(llmobs, mock_llmobs_logs): - llmobs.submit_evaluation( - span={"span_id": "123", "trace_id": "456"}, - label="toxicity", - metric_type="categorical", - value="high", - tags=["invalid"], - ) - mock_llmobs_logs.warning.assert_called_once_with("tags must be a dictionary of string key-value pairs.") + with pytest.raises(Exception) as excinfo: + llmobs.submit_evaluation( + span={"span_id": "123", "trace_id": "456"}, + label="toxicity", + metric_type="categorical", + value="high", + tags=["invalid"], + ) + assert str(excinfo.value) == "tags must be a dictionary of string key-value pairs." @pytest.mark.parametrize( "ddtrace_global_config", [dict(_llmobs_ml_app="test_app_name")], ) -def test_submit_evaluation_non_string_tags_raises_warning_but_still_submits( - llmobs, mock_llmobs_logs, mock_llmobs_eval_metric_writer -): - llmobs.submit_evaluation( - span={"span_id": "123", "trace_id": "456"}, - label="toxicity", - metric_type="categorical", - value="high", - tags={1: 2, "foo": "bar"}, - ml_app="dummy", - ) - mock_llmobs_logs.warning.assert_called_once_with( - "Failed to parse tags. Tags for evaluation metrics must be strings." - ) - mock_llmobs_logs.reset_mock() - mock_llmobs_eval_metric_writer.enqueue.assert_called_with( - _expected_llmobs_eval_metric_event( - ml_app="dummy", - span_id="123", - trace_id="456", +def test_submit_evaluation_non_string_tags_raises(llmobs): # TODO(sabrenner): check if we're ok changing this behavior + with pytest.raises(Exception) as excinfo: + llmobs.submit_evaluation( + span={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="categorical", - categorical_value="high", - tags=["ddtrace.version:{}".format(ddtrace.__version__), "ml_app:dummy", "foo:bar"], + value="high", + tags={1: 2, "foo": "bar"}, + ml_app="dummy", ) - ) + assert str(excinfo.value) == "Failed to parse tags. Tags for evaluation metrics must be strings." @pytest.mark.parametrize( @@ -1834,40 +1808,18 @@ def test_submit_evaluation_metric_with_metadata_enqueues_metric(llmobs, mock_llm metadata={"foo": ["bar", "baz"]}, ) ) - mock_llmobs_eval_metric_writer.reset() - llmobs.submit_evaluation( - span={"span_id": "123", "trace_id": "456"}, - label="toxicity", - metric_type="categorical", - value="high", - tags={"foo": "bar", "bee": "baz", "ml_app": "ml_app_override"}, - ml_app="ml_app_override", - metadata="invalid", - ) - mock_llmobs_eval_metric_writer.enqueue.assert_called_with( - _expected_llmobs_eval_metric_event( - ml_app="ml_app_override", - span_id="123", - trace_id="456", + + +def test_submit_evaluation_invalid_assessment_raises(llmobs): + with pytest.raises(Exception) as excinfo: + llmobs.submit_evaluation( + span={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="categorical", - categorical_value="high", - tags=["ddtrace.version:{}".format(ddtrace.__version__), "ml_app:ml_app_override", "foo:bar", "bee:baz"], + value="high", + assessment=True, ) - ) - - -def test_submit_evaluation_invalid_assessment_raises_warning(llmobs, mock_llmobs_logs): - llmobs.submit_evaluation( - span={"span_id": "123", "trace_id": "456"}, - label="toxicity", - metric_type="categorical", - value="high", - assessment=True, - ) - mock_llmobs_logs.warning.assert_called_once_with( - "Failed to parse assessment. assessment must be either 'pass' or 'fail'." - ) + assert str(excinfo.value) == "Failed to parse assessment. assessment must be either 'pass' or 'fail'." def test_submit_evaluation_enqueues_writer_with_assessment(llmobs, mock_llmobs_eval_metric_writer): @@ -1902,7 +1854,7 @@ def test_submit_evaluation_enqueues_writer_with_assessment(llmobs, mock_llmobs_e value="high", tags={"foo": "bar", "bee": "baz", "ml_app": "ml_app_override"}, ml_app="ml_app_override", - metadata="invalid", + metadata={"foo": ["bar", "baz"]}, assessment="fail", ) mock_llmobs_eval_metric_writer.enqueue.assert_called_with( @@ -1914,20 +1866,22 @@ def test_submit_evaluation_enqueues_writer_with_assessment(llmobs, mock_llmobs_e metric_type="categorical", categorical_value="high", tags=["ddtrace.version:{}".format(ddtrace.__version__), "ml_app:ml_app_override", "foo:bar", "bee:baz"], + metadata={"foo": ["bar", "baz"]}, assessment="fail", ) ) def test_submit_evaluation_invalid_reasoning_raises_warning(llmobs, mock_llmobs_logs): - llmobs.submit_evaluation( - span={"span_id": "123", "trace_id": "456"}, - label="toxicity", - metric_type="categorical", - value="high", - reasoning=123, - ) - mock_llmobs_logs.warning.assert_called_once_with("Failed to parse reasoning. reasoning must be a string.") + with pytest.raises(Exception) as excinfo: + llmobs.submit_evaluation( + span={"span_id": "123", "trace_id": "456"}, + label="toxicity", + metric_type="categorical", + value="high", + reasoning=123, + ) + assert str(excinfo.value) == "Failed to parse reasoning. reasoning must be a string." def test_submit_evaluation_for_enqueues_writer_with_reasoning(llmobs, mock_llmobs_eval_metric_writer): @@ -1954,29 +1908,6 @@ def test_submit_evaluation_for_enqueues_writer_with_reasoning(llmobs, mock_llmob reasoning="the content of the message involved profanity", ) ) - mock_llmobs_eval_metric_writer.reset() - llmobs.submit_evaluation_for( - span={"span_id": "123", "trace_id": "456"}, - label="toxicity", - metric_type="categorical", - value="low", - tags={"foo": "bar", "bee": "baz", "ml_app": "ml_app_override"}, - ml_app="ml_app_override", - metadata="invalid", - reasoning="the content of the message did not involve profanity or hate speech or negativity", - ) - mock_llmobs_eval_metric_writer.enqueue.assert_called_with( - _expected_llmobs_eval_metric_event( - ml_app="ml_app_override", - span_id="123", - trace_id="456", - label="toxicity", - metric_type="categorical", - categorical_value="low", - tags=["ddtrace.version:{}".format(ddtrace.__version__), "ml_app:ml_app_override", "foo:bar", "bee:baz"], - reasoning="the content of the message did not involve profanity or hate speech or negativity", - ) - ) def test_llmobs_parenting_with_root_apm_span(llmobs, tracer, llmobs_events):