SAP · ZhongpinWang · Jan 5, 2026 · Dec 18, 2025 · Dec 30, 2025 · Dec 30, 2025
diff --git a/.changeset/every-phones-joke.md b/.changeset/every-phones-joke.md
@@ -0,0 +1,5 @@
+---
+'@sap-ai-sdk/langchain': minor
+---
+
+[feat] Support disabling streaming completely via the langchain option `disableStreaming`.
diff --git a/.changeset/fresh-hotels-knock.md b/.changeset/fresh-hotels-knock.md
@@ -0,0 +1,5 @@
+---
+'@sap-ai-sdk/langchain': minor
+---
+
+[feat] Support auto-streaming via the langchain option `streaming`. When enabled (e.g., transparently by LangGraph), responses are automatically streamed in `invoke()` calls.
diff --git a/packages/langchain/package.json b/packages/langchain/package.json
@@ -39,5 +39,8 @@
     "@sap-cloud-sdk/connectivity": "^4.3.0",
     "@sap-cloud-sdk/util": "^4.3.0",
     "uuid": "^13.0.0"
+  },
+  "devDependencies": {
+    "@langchain/langgraph": "^1.0.7"
   }
 }
diff --git a/packages/langchain/src/index.ts b/packages/langchain/src/index.ts
@@ -14,6 +14,7 @@ export {
 } from './orchestration/index.js';
 export type {
   OrchestrationCallOptions,
+  LangChainOrchestrationChatModelParams,
   LangChainOrchestrationModuleConfig,
   ChatOrchestrationToolType
 } from './orchestration/index.js';
diff --git a/packages/langchain/src/openai/__snapshots__/chat.test.ts.snap b/packages/langchain/src/openai/__snapshots__/chat.test.ts.snap
@@ -1,5 +1,47 @@
 // Jest Snapshot v1, https://jestjs.io/docs/snapshot-testing
 
+exports[`Chat client streaming supports disabling auto-streaming via disableStreaming flag 1`] = `
+{
+  "id": [
+    "langchain_core",
+    "messages",
+    "AIMessage",
+  ],
+  "kwargs": {
+    "additional_kwargs": {
+      "function_call": undefined,
+      "tool_calls": undefined,
+    },
+    "content": "The capital of France is Paris.",
+    "invalid_tool_calls": [],
+    "response_metadata": {
+      "created": undefined,
+      "finish_reason": undefined,
+      "function_call": undefined,
+      "id": undefined,
+      "index": 0,
+      "model": undefined,
+      "object": undefined,
+      "promptFilterResults": undefined,
+      "tokenUsage": {
+        "completionTokens": 0,
+        "promptTokens": 0,
+        "totalTokens": 0,
+      },
+      "tool_calls": undefined,
+    },
+    "tool_calls": [],
+    "usage_metadata": {
+      "input_tokens": 0,
+      "output_tokens": 0,
+      "total_tokens": 0,
+    },
+  },
+  "lc": 1,
+  "type": "constructor",
+}
+`;
+
 exports[`Chat client streaming supports streaming responses 1`] = `
 {
   "id": [

diff --git a/packages/langchain/src/openai/chat.test.ts b/packages/langchain/src/openai/chat.test.ts
@@ -3,6 +3,12 @@ import { apiVersion } from '@sap-ai-sdk/foundation-models/internal.js';
 import { toJsonSchema } from '@langchain/core/utils/json_schema';
 import { getSchemaDescription } from '@langchain/core/utils/types';
 import { jest } from '@jest/globals';
+import {
+  START,
+  END,
+  MessagesAnnotation,
+  StateGraph
+} from '@langchain/langgraph';
 import { addNumbersTool, joke } from '../../../../test-util/tools.js';
 import {
   mockClientCredentialsGrantCall,
@@ -332,6 +338,161 @@ describe('Chat client', () => {
       expect(finalOutput).toMatchSnapshot();
     });
 
+    it('supports auto-streaming responses', async () => {
+      mockInference(
+        {
+          data: {
+            messages: [
+              {
+                role: 'user' as const,
+                content: 'What is the capital of France?'
+              }
+            ],
+            stream: true,
+            stream_options: {
+              include_usage: true
+            }
+          }
+        },
+        {
+          data: mockResponseStream,
+          status: 200
+        },
+        endpoint
+      );
+      jest.spyOn(AzureOpenAiChatClient.prototype, '_streamResponseChunks');
+
+      client.streaming = true;
+      expect(client.streaming).toBe(true);
+
+      const finalOutput = await client.invoke('What is the capital of France?');
+
+      expect(finalOutput).toBeDefined();
+      expect(client._streamResponseChunks).toHaveBeenCalled();
+    });
+
+    it('supports disabling auto-streaming via disableStreaming flag', async () => {
+      mockInference(
+        {
+          data: {
+            messages: [
+              {
+                role: 'user' as const,
+                content: 'What is the capital of France?'
+              }
+            ]
+          }
+        },
+        {
+          data: {
+            choices: [
+              {
+                message: {
+                  role: 'assistant',
+                  content: 'The capital of France is Paris.'
+                },
+                index: 0
+              }
+            ]
+          },
+          status: 200
+        },
+        endpoint
+      );
+      jest.spyOn(AzureOpenAiChatClient.prototype, '_streamResponseChunks');
+
+      client.streaming = false;
+      client.disableStreaming = true;
+
+      const finalOutput = await client.invoke('What is the capital of France?');
+
+      expect(finalOutput).toMatchSnapshot();
+      expect(client._streamResponseChunks).not.toHaveBeenCalled();
+    });
+
+    it('has langchain handle disabling streaming via disableStreaming flag in stream', async () => {
+      mockInference(
+        {
+          data: {
+            messages: [
+              {
+                role: 'user' as const,
+                content: 'What is the capital of France?'
+              }
+            ]
+          }
+        },
+        {
+          data: {
+            choices: [
+              {
+                message: {
+                  role: 'assistant',
+                  content: 'The capital of France is Paris.'
+                },
+                index: 0
+              }
+            ]
+          },
+          status: 200
+        },
+        endpoint
+      );
+      jest.spyOn(AzureOpenAiChatClient.prototype, '_streamResponseChunks');
+
+      client.disableStreaming = true;
+      client.streaming = false;
+
+      const stream = await client.stream('What is the capital of France?');
+
+      // Non-streaming response, so only one chunk expected
+      const firstChunk = await stream.next();
+      expect(firstChunk.value).toBeDefined();
+      expect(firstChunk.done).toBe(false);
+      // Verify that no further chunks are present
+      const trailingChunk = await stream.next();
+      expect(trailingChunk.done).toBe(true);
+      expect(client._streamResponseChunks).not.toHaveBeenCalled();
+    });
+
+    it('should handle streaming and disabling streaming flags as expected', async () => {
+      let testClient = new AzureOpenAiChatClient({
+        modelName: 'gpt-4o',
+        streaming: true,
+        disableStreaming: true
+      });
+
+      // streaming should be disabled due to disableStreaming being true
+      expect(testClient.streaming).toBe(false);
+      expect(testClient.disableStreaming).toBe(true);
+
+      testClient = new AzureOpenAiChatClient({
+        modelName: 'gpt-4o',
+        streaming: false
+      });
+
+      // streaming should be disabled
+      expect(testClient.streaming).toBe(false);
+      expect(testClient.disableStreaming).toBe(true);
+
+      testClient = new AzureOpenAiChatClient({
+        modelName: 'gpt-4o',
+        streaming: true
+      });
+
+      // auto-streaming should be enabled
+      expect(testClient.streaming).toBe(true);
+      expect(testClient.disableStreaming).toBe(false);
+
+      testClient = new AzureOpenAiChatClient({
+        modelName: 'gpt-4o'
+      });
+
+      // auto-streaming and disable-streaming should be disabled by default
+      expect(testClient.streaming).toBe(false);
+      expect(testClient.disableStreaming).toBe(false);
+    });
+
     it('streams and aborts with a signal', async () => {
       mockInference(
         {
@@ -406,5 +567,58 @@ describe('Chat client', () => {
       });
       expect(finalOutput).toMatchSnapshot();
     });
+    it('streams when invoked in a streaming langgraph', async () => {
+      mockInference(
+        {
+          data: {
+            messages: [
+              {
+                role: 'user',
+                content: 'Hello!'
+              }
+            ],
+            stream: true,
+            stream_options: {
+              include_usage: true
+            }
+          }
+        },
+        {
+          data: mockResponseStream,
+          status: 200
+        },
+        endpoint
+      );
+      jest.spyOn(AzureOpenAiChatClient.prototype, '_streamResponseChunks');
+      // Simulate a minimal streaming langgraph-like workflow
+      const llm = new AzureOpenAiChatClient({ modelName: 'gpt-4o' });
+
+      // Simulate a node function that calls the model using invoke
+      const callModel = async (state: { messages: any }) => {
+        const messages = await llm.invoke(state.messages);
+        return { messages };
+      };
+
+      // Define a new graph
+      const workflow = new StateGraph(MessagesAnnotation)
+        // Define the (single) node in the graph
+        .addNode('model', callModel)
+        .addEdge(START, 'model')
+        .addEdge('model', END);
+
+      const app = workflow.compile();
+      const stream = await app.stream(
+        { messages: [{ role: 'user', content: 'Hello!' }] },
+        // langgraph will only enable streaming in a more granular streaming mode than the default (values)
+        // messages: Streams 2-tuples (LLM token, metadata) from any graph nodes where an LLM is invoked.
+        // Stream modes: https://docs.langchain.com/oss/javascript/langgraph/streaming#supported-stream-modes
+        { streamMode: 'messages' as const }
+      );
+
+      for await (const _ of stream) {
+        // Empty
+      }
+      expect(llm._streamResponseChunks).toHaveBeenCalled();
+    });
   });
 });
diff --git a/packages/langchain/src/openai/chat.ts b/packages/langchain/src/openai/chat.ts
@@ -55,6 +55,7 @@ export class AzureOpenAiChatClient extends BaseChatModel<AzureOpenAiChatCallOpti
   max_tokens?: number;
   supportsStrictToolCalling?: boolean;
   modelName: string;
+  streaming: boolean = false;
   private openAiChatClient: AzureOpenAiChatClientBase;
 
   constructor(
@@ -72,6 +73,19 @@ export class AzureOpenAiChatClient extends BaseChatModel<AzureOpenAiChatCallOpti
     this.presence_penalty = fields.presence_penalty;
     this.frequency_penalty = fields.frequency_penalty;
     this.max_tokens = fields.max_tokens;
+    // Initialize streaming flags with LangChain-compatible behavior:
+    // - `streaming`: true enables auto-streaming in `invoke()` calls
+    // - `disableStreaming`: true overrides streaming flag
+    // - `streaming`: `false` causes `disableStreaming` to be set to `true` for framework compatibility
+    this.disableStreaming = fields?.disableStreaming === true;
+    // if streaming is explicitly false, streaming is disabled
+    if (fields?.streaming === false) {
+      this.disableStreaming = true;
+    }
+    // Enable streaming only when `streaming` is `true` (default `false`) and `disableStreaming` is not `true` (default `undefined`).
+    this.streaming =
+      fields?.streaming === true && this.disableStreaming !== true;
+
     if (fields.supportsStrictToolCalling !== undefined) {
       this.supportsStrictToolCalling = fields.supportsStrictToolCalling;
     }
@@ -86,6 +100,21 @@ export class AzureOpenAiChatClient extends BaseChatModel<AzureOpenAiChatCallOpti
     options: typeof this.ParsedCallOptions,
     runManager?: CallbackManagerForLLMRun
   ): Promise<ChatResult> {
+    // Auto-streaming: if streaming is enabled, use _streamResponseChunks
+    // and concatenate chunks transparently for the caller
+    if (this.streaming) {
+      let generation;
+      const stream = this._streamResponseChunks(messages, options, runManager);
+      for await (const chunk of stream) {
+        generation =
+          generation === undefined ? chunk : generation.concat(chunk);
+      }
+      if (generation === undefined) {
+        throw new Error('No chunks were generated from the stream.');
+      }
+      return { generations: [generation] };
+    }
+
     const res = await this.caller.callWithOptions(
       {
         signal: options.signal

diff --git a/packages/langchain/src/openai/types.ts b/packages/langchain/src/openai/types.ts
@@ -35,6 +35,12 @@ export type AzureOpenAiChatModelParams = Pick<
    * If `undefined` the `strict` argument will not be passed to OpenAI.
    */
   supportsStrictToolCalling?: boolean;
+  /**
+   * Whether the model should automatically stream responses when using `invoke()`.
+   * If {@link disableStreaming} is set to `true`, this option will be ignored.
+   * If {@link streaming} is explicitly set to `false`, {@link disableStreaming} will be set to `true`.
+   */
+  streaming?: boolean;
 } & BaseChatModelParams &
   ModelConfig<AzureOpenAiChatModel> &
   ResourceGroupConfig;