fix(litellm): be defensive when appending to streamed chunks [backport 3.9] (#13734)

github-actions[bot] · ncybul · web-flow · commit 03c93b73555a · 2025-06-20T16:49:47.000-04:00
Backport 9d80223 from #13712 to 3.9. This PR is a workaround for this [bug report](#13700) where LiteLLM users are running into an unhandled exception caused by trying to append a chunk choice to the list of streamed chunks at that choice index. The root of the issue comes down to where the user is setting the parameter for number of choices in the streamed response. If the `n` parameter was set as a kwarg, then there would be no issue. However, if the parameter was set elsewhere (e.g. in the user's proxy config file), then the `_streamed_chunks` list would incorrectly expect 1 choice which would lead to an indexing error. I was able to reproduce the issue with the following config and client request: Config File ``` model_list: - model_name: gpt-3.5-turbo litellm_params: model: openai/gpt-3.5-turbo api_key: "os.environ/OPENAI_API_KEY" n: 2 temperature: 0.2 ``` Client Request ``` import os import litellm import asyncio from litellm import acompletion litellm.api_key = os.environ["OPENAI_API_KEY"] async def acompletion_proxy(): messages = [{ "content": "What color is the sky?","role": "user"}] response = await acompletion(model="gpt-3.5-turbo", messages=messages, api_base="http://0.0.0.0:4000/", stream=True) async for item in response: print(item) if __name__ == "__main__": asyncio.run(acompletion_proxy()) ``` This lead to the following error: ``` Traceback (most recent call last): File "/Users/nicole.cybul/Documents/ML Observability/scripts/integrations/_simple_litellm_script.py", line 14, in <module> for item in response: File "/Users/nicole.cybul/Documents/ML Observability/scripts/.venv/lib/python3.11/site-packages/ddtrace/contrib/internal/litellm/utils.py", line 63, in __iter__ _loop_handler(chunk, self._streamed_chunks) File "/Users/nicole.cybul/Documents/ML Observability/scripts/.venv/lib/python3.11/site-packages/ddtrace/contrib/internal/litellm/utils.py", line 131, in _loop_handler streamed_chunks[choice.index].append(choice) ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^ IndexError: list index out of range ``` The fix in this PR uses a `defaultdict(list)` which should be robust against these types of indexing issues. With this new fix, the same request leads to a successful response and this [trace](https://dd.datad0g.com/llm/traces?query=%40ml_app%3Anicole-test%20%40event_type%3Aspan%20%40parent_id%3Aundefined&agg_m=count&agg_m_source=base&agg_t=count&fromUser=true&llmPanels=%5B%7B%22t%22%3A%22sampleDetailPanel%22%2C%22rEID%22%3A%22AwAAAZeEzsIo5PnrZAAAABhBWmVFenNJb0FBRDZPcUs4ZV94bEFBQUEAAAAkZjE5Nzg0Y2UtZTFlNy00YzI2LTk5MWQtMjg3YmJlNGM2ZTllAAAAIg%22%7D%5D&spanId=15697199987942968683&start=1750278947467&end=1750279847467&paused=false) in the product. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: ncybul <124532568+ncybul@users.noreply.github.com>
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
@@ -1,3 +1,4 @@
+from collections import defaultdict
 import sys
 
 import wrapt
@@ -20,10 +21,9 @@ def extract_host_tag(kwargs):
 class BaseTracedLiteLLMStream(wrapt.ObjectProxy):
     def __init__(self, wrapped, integration, span, kwargs):
         super().__init__(wrapped)
-        n = kwargs.get("n", 1) or 1
         self._dd_integration = integration
         self._span_info = [(span, kwargs)]
-        self._streamed_chunks = [[] for _ in range(n)]
+        self._streamed_chunks = defaultdict(list)
 
     def _add_router_span_info(self, span, kwargs, instance):
         """Handler to add router span to this streaming object.
@@ -127,8 +127,9 @@ def _loop_handler(chunk, streamed_chunks):
 
     When handling a streamed chat/completion response, this function is called for each chunk in the streamed response.
     """
-    for choice in chunk.choices:
-        streamed_chunks[choice.index].append(choice)
+    for choice in getattr(chunk, "choices", []):
+        choice_index = getattr(choice, "index", 0)
+        streamed_chunks[choice_index].append(choice)
     if getattr(chunk, "usage", None):
         streamed_chunks[0].insert(0, chunk)
 
@@ -138,11 +139,11 @@ def _process_finished_stream(integration, span, kwargs, streamed_chunks, operati
         formatted_completions = None
         if integration.is_completion_operation(operation):
             formatted_completions = [
-                openai_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks
+                openai_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks.values()
             ]
         else:
             formatted_completions = [
-                openai_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks
+                openai_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks.values()
             ]
         if integration.is_pc_sampled_llmobs(span):
             integration.llmobs_set_tags(
diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py
@@ -401,7 +401,7 @@ def openai_construct_completion_from_streamed_chunks(streamed_chunks: List[Any])
     if not streamed_chunks:
         return {"text": ""}
     completion = {"text": "".join(c.text for c in streamed_chunks if getattr(c, "text", None))}
-    if streamed_chunks[-1].finish_reason is not None:
+    if getattr(streamed_chunks[-1], "finish_reason", None):
         completion["finish_reason"] = streamed_chunks[-1].finish_reason
     if hasattr(streamed_chunks[0], "usage"):
         completion["usage"] = streamed_chunks[0].usage
diff --git a/releasenotes/notes/litellm-fix-out-of-bounds-error-8b4fa7f2f996dca7.yaml b/releasenotes/notes/litellm-fix-out-of-bounds-error-8b4fa7f2f996dca7.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+  - |
+    litellm: This fix resolves an out of bounds error when handling streamed responses.
+    This error occurred when the number of choices in a streamed response was not set as a keyword argument.

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +fixes:
 +  - |
 +    litellm: This fix resolves an out of bounds error when handling streamed responses.
 +    This error occurred when the number of choices in a streamed response was not set as a keyword argument.