chore(openai): refactor streamed response handling to the tracing plugin (#6107)

sabrenner · web-flow · commit ab554a7617ea · 2025-07-17T11:46:00.000-04:00
* refactor to tracing plugin
diff --git a/packages/datadog-instrumentations/src/openai.js b/packages/datadog-instrumentations/src/openai.js
@@ -5,6 +5,7 @@ const shimmer = require('../../datadog-shimmer')
 
 const dc = require('dc-polyfill')
 const ch = dc.tracingChannel('apm:openai:request')
+const onStreamedChunkCh = dc.channel('apm:openai:request:chunk')
 
 const V4_PACKAGE_SHIMS = [
   {
@@ -160,119 +161,24 @@ addHook({ name: 'openai', file: 'dist/api.js', versions: ['>=3.0.0 <4'] }, expor
   return exports
 })
 
-function addStreamedChunk (content, chunk) {
-  content.usage = chunk.usage // add usage if it was specified to be returned
-  for (const choice of chunk.choices) {
-    const choiceIdx = choice.index
-    const oldChoice = content.choices.find(choice => choice?.index === choiceIdx)
-    if (oldChoice) {
-      if (!oldChoice.finish_reason) {
-        oldChoice.finish_reason = choice.finish_reason
-      }
-
-      // delta exists on chat completions
-      const delta = choice.delta
-
-      if (delta) {
-        const content = delta.content
-        if (content) {
-          if (oldChoice.delta.content) { // we don't want to append to undefined
-            oldChoice.delta.content += content
-          } else {
-            oldChoice.delta.content = content
-          }
-        }
-      } else {
-        const text = choice.text
-        if (text) {
-          if (oldChoice.text) {
-            oldChoice.text += text
-          } else {
-            oldChoice.text = text
-          }
-        }
-      }
-
-      // tools only exist on chat completions
-      const tools = delta && choice.delta.tool_calls
-
-      if (tools) {
-        oldChoice.delta.tool_calls = tools.map((newTool, toolIdx) => {
-          const oldTool = oldChoice.delta.tool_calls?.[toolIdx]
-
-          if (oldTool) {
-            oldTool.function.arguments += newTool.function.arguments
-            return oldTool
-          }
-
-          return newTool
-        })
-      }
-    } else {
-      // we don't know which choices arrive in which order
-      content.choices[choiceIdx] = choice
-    }
-  }
-}
-
-function convertBufferstoObjects (chunks) {
-  return Buffer
-    .concat(chunks) // combine the buffers
-    .toString() // stringify
-    .split(/(?=data:)/) // split on "data:"
-    .map(chunk => chunk.replaceAll('\n', '').slice(6)) // remove newlines and 'data: ' from the front
-    .slice(0, -1) // remove the last [DONE] message
-    .map(JSON.parse) // parse all of the returned objects
-}
-
 /**
  * For streamed responses, we need to accumulate all of the content in
  * the chunks, and let the combined content be the final response.
  * This way, spans look the same as when not streamed.
  */
-function wrapStreamIterator (response, options, n, ctx) {
-  let processChunksAsBuffers = false
-  let chunks = []
+function wrapStreamIterator (response, options, ctx) {
   return function (itr) {
     return function () {
       const iterator = itr.apply(this, arguments)
       shimmer.wrap(iterator, 'next', next => function () {
         return next.apply(this, arguments)
           .then(res => {
             const { done, value: chunk } = res
-
-            if (chunk) {
-              chunks.push(chunk)
-              // TODO(BridgeAR): It likely depends on the options being passed
-              // through if the stream returns buffers or not. By reading that,
-              // we don't have to do the instanceof check anymore, which is
-              // relatively expensive.
-              if (chunk instanceof Buffer) {
-                // this operation should be safe
-                // if one chunk is a buffer (versus a plain object), the rest should be as well
-                processChunksAsBuffers = true
-              }
-            }
+            onStreamedChunkCh.publish({ ctx, chunk, done })
 
             if (done) {
-              let body = {}
-              if (processChunksAsBuffers) {
-                chunks = convertBufferstoObjects(chunks)
-              }
-
-              if (chunks.length) {
-                // Define the initial body having all the content outside of choices from the first chunk
-                // this will include import data like created, id, model, etc.
-                body = { ...chunks[0], choices: Array.from({ length: n }) }
-                // Start from the first chunk, and add its choices into the body
-                for (const chunk_ of chunks) {
-                  addStreamedChunk(body, chunk_)
-                }
-              }
-
               finish(ctx, {
                 headers: response.headers,
-                data: body,
                 request: {
                   path: response.url,
                   method: options.method
@@ -312,17 +218,6 @@ for (const extension of extensions) {
           // chat.completions and completions
           const stream = streamedResponse && getOption(arguments, 'stream', false)
 
-          // we need to compute how many prompts we are sending in streamed cases for completions
-          // not applicable for chat completiond
-          let n
-          if (stream) {
-            n = getOption(arguments, 'n', 1)
-            const prompt = getOption(arguments, 'prompt')
-            if (Array.isArray(prompt) && typeof prompt[0] !== 'number') {
-              n *= prompt.length
-            }
-          }
-
           const client = this._client || this.client
 
           const ctx = {
@@ -348,7 +243,7 @@ for (const extension of extensions) {
                   const parsedPromise = origApiPromParse.apply(this, arguments)
                     .then(body => Promise.all([this.responsePromise, body]))
 
-                  return handleUnwrappedAPIPromise(parsedPromise, ctx, stream, n)
+                  return handleUnwrappedAPIPromise(parsedPromise, ctx, stream)
                 })
 
                 return unwrappedPromise
@@ -361,7 +256,7 @@ for (const extension of extensions) {
               const parsedPromise = origApiPromParse.apply(this, arguments)
                 .then(body => Promise.all([this.responsePromise, body]))
 
-              return handleUnwrappedAPIPromise(parsedPromise, ctx, stream, n)
+              return handleUnwrappedAPIPromise(parsedPromise, ctx, stream)
             })
 
             ch.end.publish(ctx)
@@ -375,15 +270,15 @@ for (const extension of extensions) {
   }
 }
 
-function handleUnwrappedAPIPromise (apiProm, ctx, stream, n) {
+function handleUnwrappedAPIPromise (apiProm, ctx, stream) {
   return apiProm
     .then(([{ response, options }, body]) => {
       if (stream) {
         if (body.iterator) {
-          shimmer.wrap(body, 'iterator', wrapStreamIterator(response, options, n, ctx))
+          shimmer.wrap(body, 'iterator', wrapStreamIterator(response, options, ctx))
         } else {
           shimmer.wrap(
-            body.response.body, Symbol.asyncIterator, wrapStreamIterator(response, options, n, ctx)
+            body.response.body, Symbol.asyncIterator, wrapStreamIterator(response, options, ctx)
           )
         }
       } else {
@@ -412,7 +307,11 @@ function finish (ctx, response, error) {
     ch.error.publish(ctx)
   }
 
-  ctx.result = response
+  // for successful streamed responses, we've already set the result on ctx.body,
+  // so we don't want to override it here
+  ctx.result ??= {}
+  Object.assign(ctx.result, response)
+
   ch.asyncEnd.publish(ctx)
 }
 
diff --git a/packages/datadog-plugin-openai/src/stream-helpers.js b/packages/datadog-plugin-openai/src/stream-helpers.js
@@ -0,0 +1,113 @@
+'use strict'
+
+/**
+ * Combines legacy OpenAI streamed chunks into a single object.
+ * These legacy chunks are returned as buffers instead of individual objects.
+ * @param {readonly Uint8Array[]} chunks
+ * @returns {Array<Record<string, any>>}
+ */
+function convertBuffersToObjects (chunks) {
+  return Buffer
+    .concat(chunks) // combine the buffers
+    .toString() // stringify
+    .split(/(?=data:)/) // split on "data:"
+    .map(chunk => chunk.replaceAll('\n', '').slice(6)) // remove newlines and 'data: ' from the front
+    .slice(0, -1) // remove the last [DONE] message
+    .map(JSON.parse) // parse all of the returned objects
+}
+
+/**
+ * Constructs the entire response from a stream of OpenAI completion chunks,
+ * mainly combining the text choices of each chunk into a single string per choice.
+ * @param {Array<Record<string, any>>} chunks
+ * @param {number} n the number of choices to expect in the response
+ * @returns {Record<string, any>}
+ */
+function constructCompletionResponseFromStreamedChunks (chunks, n) {
+  const body = { ...chunks[0], choices: Array.from({ length: n }) }
+
+  for (const chunk of chunks) {
+    body.usage = chunk.usage
+    for (const choice of chunk.choices) {
+      const choiceIdx = choice.index
+      const oldChoice = body.choices.find(choice => choice?.index === choiceIdx)
+      if (oldChoice) {
+        if (!oldChoice.finish_reason) {
+          oldChoice.finish_reason = choice.finish_reason
+        }
+
+        const text = choice.text
+        if (text) {
+          if (oldChoice.text) {
+            oldChoice.text += text
+          } else {
+            oldChoice.text = text
+          }
+        }
+      } else {
+        body.choices[choiceIdx] = choice
+      }
+    }
+  }
+
+  return body
+}
+
+/**
+ * Constructs the entire response from a stream of OpenAI chat completion chunks,
+ * mainly combining the text choices of each chunk into a single string per choice.
+ * @param {Array<Record<string, any>>} chunks
+ * @param {number} n the number of choices to expect in the response
+ * @returns {Record<string, any>}
+ */
+function constructChatCompletionResponseFromStreamedChunks (chunks, n) {
+  const body = { ...chunks[0], choices: Array.from({ length: n }) }
+
+  for (const chunk of chunks) {
+    body.usage = chunk.usage
+    for (const choice of chunk.choices) {
+      const choiceIdx = choice.index
+      const oldChoice = body.choices.find(choice => choice?.index === choiceIdx)
+      if (oldChoice) {
+        if (!oldChoice.finish_reason) {
+          oldChoice.finish_reason = choice.finish_reason
+        }
+
+        const delta = choice.delta
+        if (!delta) continue
+
+        const content = delta.content
+        if (content) {
+          if (oldChoice.delta.content) {
+            oldChoice.delta.content += content
+          } else {
+            oldChoice.delta.content = content
+          }
+        }
+
+        const tools = choice.delta.tool_calls
+        if (!tools) continue
+
+        oldChoice.delta.tool_calls = tools.map((newTool, toolIdx) => {
+          const oldTool = oldChoice.delta.tool_calls?.[toolIdx]
+          if (oldTool) {
+            oldTool.function.arguments += newTool.function.arguments
+            return oldTool
+          }
+
+          return newTool
+        })
+      } else {
+        body.choices[choiceIdx] = choice
+      }
+    }
+  }
+
+  return body
+}
+
+module.exports = {
+  convertBuffersToObjects,
+  constructCompletionResponseFromStreamedChunks,
+  constructChatCompletionResponseFromStreamedChunks
+}
diff --git a/packages/datadog-plugin-openai/src/tracing.js b/packages/datadog-plugin-openai/src/tracing.js
@@ -10,6 +10,11 @@ const { MEASURED } = require('../../../ext/tags')
 const { estimateTokens } = require('./token-estimator')
 
 const makeUtilities = require('../../dd-trace/src/plugins/util/llm')
+const {
+  convertBuffersToObjects,
+  constructCompletionResponseFromStreamedChunks,
+  constructChatCompletionResponseFromStreamedChunks
+} = require('./stream-helpers')
 
 let normalize
 
@@ -48,6 +53,41 @@ class OpenAiTracingPlugin extends TracingPlugin {
 
       normalize = utilities.normalize
     }
+
+    this.addSub('apm:openai:request:chunk', ({ ctx, chunk, done }) => {
+      if (!ctx.chunks) ctx.chunks = []
+
+      if (chunk) ctx.chunks.push(chunk)
+      if (!done) return
+
+      let chunks = ctx.chunks
+      if (chunks.length === 0) return
+
+      const firstChunk = chunks[0]
+      // TODO(BridgeAR): It likely depends on the options being passed
+      // through if the stream returns buffers or not. By reading that,
+      // we don't have to do the instanceof check anymore, which is
+      // relatively expensive.
+      if (firstChunk instanceof Buffer) {
+        chunks = convertBuffersToObjects(chunks)
+      }
+
+      const methodName = ctx.currentStore.normalizedMethodName
+      let n = 1
+      const prompt = ctx.args[0].prompt
+      if (Array.isArray(prompt) && typeof prompt[0] !== 'number') {
+        n *= prompt.length
+      }
+
+      let response = {}
+      if (methodName === 'createCompletion') {
+        response = constructCompletionResponseFromStreamedChunks(chunks, n)
+      } else if (methodName === 'createChatCompletion') {
+        response = constructChatCompletionResponseFromStreamedChunks(chunks, n)
+      }
+
+      ctx.result = { data: response }
+    })
   }
 
   configure (config) {