Merge pull request #14637 from akraines/feature/middle-truncate-spend-logs

krrishdholakia · web-flow · commit ff36dfdc76e4 · 2025-09-17T23:47:04.000-07:00
feat: implement middle-truncation for spend log payloads
diff --git a/litellm/constants.py b/litellm/constants.py
@@ -179,7 +179,7 @@
     os.getenv("NON_LLM_CONNECTION_TIMEOUT", 15)
 )  # timeout for adjacent services (e.g. jwt auth)
 MAX_EXCEPTION_MESSAGE_LENGTH = int(os.getenv("MAX_EXCEPTION_MESSAGE_LENGTH", 2000))
-MAX_STRING_LENGTH_PROMPT_IN_DB = int(os.getenv("MAX_STRING_LENGTH_PROMPT_IN_DB", 1000))
+MAX_STRING_LENGTH_PROMPT_IN_DB = int(os.getenv("MAX_STRING_LENGTH_PROMPT_IN_DB", 2048))
 BEDROCK_MAX_POLICY_SIZE = int(os.getenv("BEDROCK_MAX_POLICY_SIZE", 75))
 REPLICATE_POLLING_DELAY_SECONDS = float(
     os.getenv("REPLICATE_POLLING_DELAY_SECONDS", 0.5)
diff --git a/litellm/proxy/spend_tracking/spend_tracking_utils.py b/litellm/proxy/spend_tracking/spend_tracking_utils.py
@@ -501,7 +501,34 @@ def _sanitize_value(value: Any) -> Any:
             return [_sanitize_value(item) for item in value]
         elif isinstance(value, str):
             if len(value) > MAX_STRING_LENGTH_PROMPT_IN_DB:
-                return f"{value[:MAX_STRING_LENGTH_PROMPT_IN_DB]}... ({LITELLM_TRUNCATED_PAYLOAD_FIELD} {len(value) - MAX_STRING_LENGTH_PROMPT_IN_DB} chars)"
+                # Keep 35% from beginning and 65% from end (end is usually more important)
+                # This split ensures we keep more context from the end of conversations
+                start_ratio = 0.35
+                end_ratio = 0.65
+                
+                # Calculate character distribution
+                start_chars = int(MAX_STRING_LENGTH_PROMPT_IN_DB * start_ratio)
+                end_chars = int(MAX_STRING_LENGTH_PROMPT_IN_DB * end_ratio)
+                
+                # Ensure we don't exceed the total limit
+                total_keep = start_chars + end_chars
+                if total_keep > MAX_STRING_LENGTH_PROMPT_IN_DB:
+                    end_chars = MAX_STRING_LENGTH_PROMPT_IN_DB - start_chars
+                
+                # If the string length is less than what we want to keep, just truncate normally
+                if len(value) <= MAX_STRING_LENGTH_PROMPT_IN_DB:
+                    return value
+                
+                # Calculate how many characters are being skipped
+                skipped_chars = len(value) - total_keep
+                
+                # Build the truncated string: beginning + truncation marker + end
+                truncated_value = (
+                    f"{value[:start_chars]}"
+                    f"... ({LITELLM_TRUNCATED_PAYLOAD_FIELD} skipped {skipped_chars} chars) ..."
+                    f"{value[-end_chars:]}"
+                )
+                return truncated_value
             return value
         return value
 
diff --git a/tests/logging_callback_tests/test_spend_logs.py b/tests/logging_callback_tests/test_spend_logs.py
@@ -401,11 +401,16 @@ def test_spend_logs_payload_with_prompts_enabled(monkeypatch):
 def test_large_request_no_truncation_threshold():
     """
     Test that MAX_STRING_LENGTH_PROMPT_IN_DB constant is used for request body sanitization
+    and that the new truncation logic keeps beginning (35%) and end (65%) of the string
     """
     from litellm.constants import MAX_STRING_LENGTH_PROMPT_IN_DB, LITELLM_TRUNCATED_PAYLOAD_FIELD
     
     # Create a large string that exceeds the threshold
-    large_content = "x" * (MAX_STRING_LENGTH_PROMPT_IN_DB + 500)
+    # Use a pattern that allows us to verify beginning and end are preserved
+    start_pattern = "START" * 250  # 1250 chars
+    middle_pattern = "MIDDLE" * 200  # 1200 chars
+    end_pattern = "END" * 250  # 750 chars
+    large_content = start_pattern + middle_pattern + end_pattern
     
     request_body = {
         "messages": [
@@ -418,10 +423,20 @@ def test_large_request_no_truncation_threshold():
     
     # Verify the content was truncated
     truncated_content = sanitized["messages"][0]["content"]
-    assert len(truncated_content) > MAX_STRING_LENGTH_PROMPT_IN_DB  # includes truncation message
-    assert truncated_content.startswith("x" * MAX_STRING_LENGTH_PROMPT_IN_DB)
+    
+    # Calculate expected character counts (35% start, 65% end)
+    expected_start_chars = int(MAX_STRING_LENGTH_PROMPT_IN_DB * 0.35)
+    expected_end_chars = int(MAX_STRING_LENGTH_PROMPT_IN_DB * 0.65)
+    
+    # Should keep first 35% of MAX_STRING_LENGTH_PROMPT_IN_DB chars
+    assert truncated_content.startswith(large_content[:expected_start_chars])
+    
+    # Should keep last 65% of MAX_STRING_LENGTH_PROMPT_IN_DB chars
+    assert truncated_content.endswith(large_content[-expected_end_chars:])
+    
+    # Should have truncation marker
     assert LITELLM_TRUNCATED_PAYLOAD_FIELD in truncated_content
-    assert "500 chars" in truncated_content
+    assert "skipped" in truncated_content
 
 
 def test_small_request_no_truncation():
@@ -452,7 +467,7 @@ def test_configurable_string_length_env_var(monkeypatch):
     Test that MAX_STRING_LENGTH_PROMPT_IN_DB can be configured via environment variable
     """
     # Set environment variable to a custom value
-    monkeypatch.setenv("MAX_STRING_LENGTH_PROMPT_IN_DB", "500")
+    monkeypatch.setenv("MAX_STRING_LENGTH_PROMPT_IN_DB", "1000")
     
     # Import after setting env var to ensure it picks up the new value
     import importlib
@@ -465,10 +480,43 @@ def test_configurable_string_length_env_var(monkeypatch):
     from litellm.proxy.spend_tracking.spend_tracking_utils import _sanitize_request_body_for_spend_logs_payload
     
     # Verify the constant was set to the env var value
-    assert MAX_STRING_LENGTH_PROMPT_IN_DB == 500
+    assert MAX_STRING_LENGTH_PROMPT_IN_DB == 1000
     
     # Test truncation with the custom value
-    large_content = "y" * 750  # 250 chars over the custom limit
+    large_content = "A" * 500 + "B" * 800 + "C" * 500  # 1800 chars total
+    
+    request_body = {
+        "messages": [
+            {"role": "user", "content": large_content}
+        ],
+        "model": "gpt-4"
+    }
+    
+    sanitized = _sanitize_request_body_for_spend_logs_payload(request_body)
+    
+    # Verify truncation occurred with 35% beginning and 65% end preserved
+    truncated_content = sanitized["messages"][0]["content"]
+    expected_start = int(1000 * 0.35)  # 350 chars from beginning
+    expected_end = int(1000 * 0.65)    # 650 chars from end
+    
+    assert truncated_content.startswith(large_content[:expected_start])
+    assert truncated_content.endswith(large_content[-expected_end:])
+    assert LITELLM_TRUNCATED_PAYLOAD_FIELD in truncated_content
+    assert "skipped" in truncated_content
+    assert "800" in truncated_content  # Should mention skipped 800 chars
+
+
+def test_truncation_preserves_beginning_and_end():
+    """
+    Test that truncation preserves the beginning (35%) and end (65%) of content for better debugging
+    """
+    from litellm.constants import MAX_STRING_LENGTH_PROMPT_IN_DB, LITELLM_TRUNCATED_PAYLOAD_FIELD
+    
+    # Create content with distinct beginning, middle, and end
+    beginning = "BEGIN_" * 200  # 1200 chars
+    middle = "MIDDLE_" * 300  # 2100 chars
+    end = "_END" * 300  # 1200 chars
+    large_content = beginning + middle + end
     
     request_body = {
         "messages": [
@@ -478,9 +526,26 @@ def test_configurable_string_length_env_var(monkeypatch):
     }
     
     sanitized = _sanitize_request_body_for_spend_logs_payload(request_body)
+    truncated_content = sanitized["messages"][0]["content"]
+    
+    # Calculate expected splits (35% beginning, 65% end)
+    expected_start_chars = int(MAX_STRING_LENGTH_PROMPT_IN_DB * 0.35)
+    expected_end_chars = int(MAX_STRING_LENGTH_PROMPT_IN_DB * 0.65)
     
-    # Verify truncation occurred at the custom threshold
-    truncated_content = sanitized["messages"][0]["content"] 
-    assert truncated_content.startswith("y" * 500)
+    # Check that beginning is preserved
+    expected_beginning = large_content[:expected_start_chars]
+    assert truncated_content.startswith(expected_beginning)
+    
+    # Check that end is preserved
+    expected_end = large_content[-expected_end_chars:]
+    assert truncated_content.endswith(expected_end)
+    
+    # Check truncation marker is present
     assert LITELLM_TRUNCATED_PAYLOAD_FIELD in truncated_content
-    assert "250 chars" in truncated_content
+    assert "skipped" in truncated_content
+    
+    # Calculate expected skipped chars
+    total_chars = len(large_content)
+    kept_chars = expected_start_chars + expected_end_chars
+    expected_skipped = total_chars - kept_chars
+    assert str(expected_skipped) in truncated_content