fix(sdk): respect truncation otel environment variable (#3212)

nirga · web-flow · commit c0dd1c8c9195 · 2025-08-03T20:06:36.000+02:00
diff --git a/packages/traceloop-sdk/tests/test_tasks.py b/packages/traceloop-sdk/tests/test_tasks.py
@@ -153,3 +153,129 @@ def dataclass_task(data: TestDataClass):
         "field1": "value1",
         "field2": 123,
     }
+
+
+def test_json_truncation_with_otel_limit(exporter, monkeypatch):
+    """Test that JSON input/output is truncated when OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT is set"""
+    # Set environment variable to a small limit for testing
+    monkeypatch.setenv("OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT", "50")
+
+    @task(name="truncation_task")
+    def truncation_task(long_input):
+        # Return a long output that will also be truncated
+        return "This is a very long output string that should definitely exceed the 50 character limit"
+
+    # Call with a long input that will be truncated
+    long_input = "This is a very long input string that should definitely exceed the 50 character limit"
+    truncation_task(long_input)
+
+    spans = exporter.get_finished_spans()
+    task_span = spans[0]
+
+    # Check that input was truncated
+    input_json = task_span.attributes[SpanAttributes.TRACELOOP_ENTITY_INPUT]
+    assert len(input_json) == 50
+    assert input_json.startswith('{"args": ["This is a very long input string that s')
+
+    # Check that output was truncated
+    output_json = task_span.attributes[SpanAttributes.TRACELOOP_ENTITY_OUTPUT]
+    assert len(output_json) == 50
+    assert output_json.startswith('"This is a very long output string that should def')
+
+
+def test_json_no_truncation_without_otel_limit(exporter, monkeypatch):
+    """Test that JSON input/output is not truncated when OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT is not set"""
+    # Ensure environment variable is not set
+    monkeypatch.delenv("OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT", raising=False)
+
+    @task(name="no_truncation_task")
+    def no_truncation_task(long_input):
+        return "This is a very long output string that would be truncated if limits were set but should remain intact"
+
+    long_input = "This is a very long input string that would be truncated if limits were set but should remain intact"
+    result = no_truncation_task(long_input)
+
+    spans = exporter.get_finished_spans()
+    task_span = spans[0]
+
+    # Check that input was not truncated
+    input_data = json.loads(task_span.attributes[SpanAttributes.TRACELOOP_ENTITY_INPUT])
+    assert input_data["args"][0] == long_input
+
+    # Check that output was not truncated
+    output_data = json.loads(task_span.attributes[SpanAttributes.TRACELOOP_ENTITY_OUTPUT])
+    assert output_data == result
+
+
+def test_json_truncation_with_invalid_otel_limit(exporter, monkeypatch):
+    """Test that invalid OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT values are ignored"""
+    # Set environment variable to invalid value
+    monkeypatch.setenv("OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT", "not_a_number")
+
+    @task(name="invalid_limit_task")
+    def invalid_limit_task(test_input):
+        return "This output should not be truncated because the limit is invalid"
+
+    test_input = "This input should not be truncated because the limit is invalid"
+    result = invalid_limit_task(test_input)
+
+    spans = exporter.get_finished_spans()
+    task_span = spans[0]
+
+    # Check that input was not truncated (since invalid limit should be ignored)
+    input_data = json.loads(task_span.attributes[SpanAttributes.TRACELOOP_ENTITY_INPUT])
+    assert input_data["args"][0] == test_input
+
+    # Check that output was not truncated
+    output_data = json.loads(task_span.attributes[SpanAttributes.TRACELOOP_ENTITY_OUTPUT])
+    assert output_data == result
+
+
+@pytest.mark.asyncio
+async def test_async_json_truncation_with_otel_limit(exporter, monkeypatch):
+    """Test that JSON truncation works with async tasks"""
+    # Set environment variable to a small limit for testing
+    monkeypatch.setenv("OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT", "40")
+
+    @task(name="async_truncation_task")
+    async def async_truncation_task(long_input):
+        await asyncio.sleep(0.1)  # Simulate async work
+        return "This is a long async output that should be truncated"
+
+    long_input = "This is a long async input that should be truncated"
+    await async_truncation_task(long_input)
+
+    spans = exporter.get_finished_spans()
+    task_span = spans[0]
+
+    # Check that input was truncated
+    input_json = task_span.attributes[SpanAttributes.TRACELOOP_ENTITY_INPUT]
+    assert len(input_json) == 40
+
+    # Check that output was truncated
+    output_json = task_span.attributes[SpanAttributes.TRACELOOP_ENTITY_OUTPUT]
+    assert len(output_json) == 40
+
+
+def test_json_truncation_preserves_short_content(exporter, monkeypatch):
+    """Test that short content is not affected by truncation limits"""
+    # Set environment variable to a limit larger than our content
+    monkeypatch.setenv("OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT", "1000")
+
+    @task(name="short_content_task")
+    def short_content_task(short_input):
+        return "short output"
+
+    short_input = "short input"
+    result = short_content_task(short_input)
+
+    spans = exporter.get_finished_spans()
+    task_span = spans[0]
+
+    # Check that short input was preserved completely
+    input_data = json.loads(task_span.attributes[SpanAttributes.TRACELOOP_ENTITY_INPUT])
+    assert input_data["args"][0] == short_input
+
+    # Check that short output was preserved completely
+    output_data = json.loads(task_span.attributes[SpanAttributes.TRACELOOP_ENTITY_OUTPUT])
+    assert output_data == result
diff --git a/packages/traceloop-sdk/traceloop/sdk/decorators/base.py b/packages/traceloop-sdk/traceloop/sdk/decorators/base.py
@@ -35,9 +35,20 @@
 F = TypeVar("F", bound=Callable[P, R | Awaitable[R]])
 
 
-def _is_json_size_valid(json_str: str) -> bool:
-    """Check if JSON string size is less than 1MB"""
-    return len(json_str) < 1_000_000
+def _truncate_json_if_needed(json_str: str) -> str:
+    """
+    Truncate JSON string if it exceeds OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT;
+    truncation may yield an invalid JSON string, which is expected for logging purposes.
+    """
+    limit_str = os.getenv("OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT")
+    if limit_str:
+        try:
+            limit = int(limit_str)
+            if limit > 0 and len(json_str) > limit:
+                return json_str[:limit]
+        except ValueError:
+            pass
+    return json_str
 
 
 # Async Decorators - Deprecated
@@ -163,11 +174,11 @@ def _handle_span_input(span, args, kwargs, cls=None):
             json_input = json.dumps(
                 {"args": args, "kwargs": kwargs}, **({"cls": cls} if cls else {})
             )
-            if _is_json_size_valid(json_input):
-                span.set_attribute(
-                    SpanAttributes.TRACELOOP_ENTITY_INPUT,
-                    json_input,
-                )
+            truncated_json = _truncate_json_if_needed(json_input)
+            span.set_attribute(
+                SpanAttributes.TRACELOOP_ENTITY_INPUT,
+                truncated_json,
+            )
     except TypeError as e:
         Telemetry().log_exception(e)
 
@@ -177,11 +188,11 @@ def _handle_span_output(span, res, cls=None):
     try:
         if _should_send_prompts():
             json_output = json.dumps(res, **({"cls": cls} if cls else {}))
-            if _is_json_size_valid(json_output):
-                span.set_attribute(
-                    SpanAttributes.TRACELOOP_ENTITY_OUTPUT,
-                    json_output,
-                )
+            truncated_json = _truncate_json_if_needed(json_output)
+            span.set_attribute(
+                SpanAttributes.TRACELOOP_ENTITY_OUTPUT,
+                truncated_json,
+            )
     except TypeError as e:
         Telemetry().log_exception(e)