Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/every-phones-joke.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@sap-ai-sdk/langchain': minor
---

[feat] Support disabling streaming completely via the langchain option `disableStreaming`.
5 changes: 5 additions & 0 deletions .changeset/fresh-hotels-knock.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@sap-ai-sdk/langchain': minor
---

[feat] Support auto-streaming via the langchain option `streaming`. When enabled (e.g., transparently by LangGraph), responses are automatically streamed in `invoke()` calls.
3 changes: 3 additions & 0 deletions packages/langchain/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,8 @@
"@sap-cloud-sdk/connectivity": "^4.3.0",
"@sap-cloud-sdk/util": "^4.3.0",
"uuid": "^13.0.0"
},
"devDependencies": {
"@langchain/langgraph": "^1.0.7"
}
}
1 change: 1 addition & 0 deletions packages/langchain/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export {
} from './orchestration/index.js';
export type {
OrchestrationCallOptions,
LangChainOrchestrationChatModelParams,
LangChainOrchestrationModuleConfig,
ChatOrchestrationToolType
} from './orchestration/index.js';
42 changes: 42 additions & 0 deletions packages/langchain/src/openai/__snapshots__/chat.test.ts.snap
Original file line number Diff line number Diff line change
@@ -1,5 +1,47 @@
// Jest Snapshot v1, https://jestjs.io/docs/snapshot-testing

exports[`Chat client streaming supports disabling auto-streaming via disableStreaming flag 1`] = `
{
"id": [
"langchain_core",
"messages",
"AIMessage",
],
"kwargs": {
"additional_kwargs": {
"function_call": undefined,
"tool_calls": undefined,
},
"content": "The capital of France is Paris.",
"invalid_tool_calls": [],
"response_metadata": {
"created": undefined,
"finish_reason": undefined,
"function_call": undefined,
"id": undefined,
"index": 0,
"model": undefined,
"object": undefined,
"promptFilterResults": undefined,
"tokenUsage": {
"completionTokens": 0,
"promptTokens": 0,
"totalTokens": 0,
},
"tool_calls": undefined,
},
"tool_calls": [],
"usage_metadata": {
"input_tokens": 0,
"output_tokens": 0,
"total_tokens": 0,
},
},
"lc": 1,
"type": "constructor",
}
`;

exports[`Chat client streaming supports streaming responses 1`] = `
{
"id": [
Expand Down
214 changes: 214 additions & 0 deletions packages/langchain/src/openai/chat.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ import { apiVersion } from '@sap-ai-sdk/foundation-models/internal.js';
import { toJsonSchema } from '@langchain/core/utils/json_schema';
import { getSchemaDescription } from '@langchain/core/utils/types';
import { jest } from '@jest/globals';
import {
START,
END,
MessagesAnnotation,
StateGraph
} from '@langchain/langgraph';
import { addNumbersTool, joke } from '../../../../test-util/tools.js';
import {
mockClientCredentialsGrantCall,
Expand Down Expand Up @@ -332,6 +338,161 @@ describe('Chat client', () => {
expect(finalOutput).toMatchSnapshot();
});

it('supports auto-streaming responses', async () => {
mockInference(
{
data: {
messages: [
{
role: 'user' as const,
content: 'What is the capital of France?'
}
],
stream: true,
stream_options: {
include_usage: true
}
}
},
{
data: mockResponseStream,
status: 200
},
endpoint
);
jest.spyOn(AzureOpenAiChatClient.prototype, '_streamResponseChunks');

client.streaming = true;
expect(client.streaming).toBe(true);

const finalOutput = await client.invoke('What is the capital of France?');

expect(finalOutput).toBeDefined();
expect(client._streamResponseChunks).toHaveBeenCalled();
});

it('supports disabling auto-streaming via disableStreaming flag', async () => {
mockInference(
{
data: {
messages: [
{
role: 'user' as const,
content: 'What is the capital of France?'
}
]
}
},
{
data: {
choices: [
{
message: {
role: 'assistant',
content: 'The capital of France is Paris.'
},
index: 0
}
]
},
status: 200
},
endpoint
);
jest.spyOn(AzureOpenAiChatClient.prototype, '_streamResponseChunks');

client.streaming = false;
client.disableStreaming = true;

const finalOutput = await client.invoke('What is the capital of France?');

expect(finalOutput).toMatchSnapshot();
expect(client._streamResponseChunks).not.toHaveBeenCalled();
});

it('has langchain handle disabling streaming via disableStreaming flag in stream', async () => {
mockInference(
{
data: {
messages: [
{
role: 'user' as const,
content: 'What is the capital of France?'
}
]
}
},
{
data: {
choices: [
{
message: {
role: 'assistant',
content: 'The capital of France is Paris.'
},
index: 0
}
]
},
status: 200
},
endpoint
);
jest.spyOn(AzureOpenAiChatClient.prototype, '_streamResponseChunks');

client.disableStreaming = true;
client.streaming = false;

const stream = await client.stream('What is the capital of France?');

// Non-streaming response, so only one chunk expected
const firstChunk = await stream.next();
expect(firstChunk.value).toBeDefined();
expect(firstChunk.done).toBe(false);
// Verify that no further chunks are present
const trailingChunk = await stream.next();
expect(trailingChunk.done).toBe(true);
expect(client._streamResponseChunks).not.toHaveBeenCalled();
});

it('should handle streaming and disabling streaming flags as expected', async () => {
let testClient = new AzureOpenAiChatClient({
modelName: 'gpt-4o',
streaming: true,
disableStreaming: true
});

// streaming should be disabled due to disableStreaming being true
expect(testClient.streaming).toBe(false);
expect(testClient.disableStreaming).toBe(true);

testClient = new AzureOpenAiChatClient({
modelName: 'gpt-4o',
streaming: false
});

// streaming should be disabled
expect(testClient.streaming).toBe(false);
expect(testClient.disableStreaming).toBe(true);

testClient = new AzureOpenAiChatClient({
modelName: 'gpt-4o',
streaming: true
});

// auto-streaming should be enabled
expect(testClient.streaming).toBe(true);
expect(testClient.disableStreaming).toBe(false);

testClient = new AzureOpenAiChatClient({
modelName: 'gpt-4o'
});

// auto-streaming and disable-streaming should be disabled by default
expect(testClient.streaming).toBe(false);
expect(testClient.disableStreaming).toBe(false);
});

it('streams and aborts with a signal', async () => {
mockInference(
{
Expand Down Expand Up @@ -406,5 +567,58 @@ describe('Chat client', () => {
});
expect(finalOutput).toMatchSnapshot();
});
it('streams when invoked in a streaming langgraph', async () => {
mockInference(
{
data: {
messages: [
{
role: 'user',
content: 'Hello!'
}
],
stream: true,
stream_options: {
include_usage: true
}
}
},
{
data: mockResponseStream,
status: 200
},
endpoint
);
jest.spyOn(AzureOpenAiChatClient.prototype, '_streamResponseChunks');
// Simulate a minimal streaming langgraph-like workflow
const llm = new AzureOpenAiChatClient({ modelName: 'gpt-4o' });

// Simulate a node function that calls the model using invoke
const callModel = async (state: { messages: any }) => {
const messages = await llm.invoke(state.messages);
return { messages };
};

// Define a new graph
const workflow = new StateGraph(MessagesAnnotation)
// Define the (single) node in the graph
.addNode('model', callModel)
.addEdge(START, 'model')
.addEdge('model', END);

const app = workflow.compile();
const stream = await app.stream(
{ messages: [{ role: 'user', content: 'Hello!' }] },
// langgraph will only enable streaming in a more granular streaming mode than the default (values)
// messages: Streams 2-tuples (LLM token, metadata) from any graph nodes where an LLM is invoked.
// Stream modes: https://docs.langchain.com/oss/javascript/langgraph/streaming#supported-stream-modes
{ streamMode: 'messages' as const }
);

for await (const _ of stream) {
// Empty
}
expect(llm._streamResponseChunks).toHaveBeenCalled();
});
});
});
29 changes: 29 additions & 0 deletions packages/langchain/src/openai/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ export class AzureOpenAiChatClient extends BaseChatModel<AzureOpenAiChatCallOpti
max_tokens?: number;
supportsStrictToolCalling?: boolean;
modelName: string;
streaming: boolean = false;
private openAiChatClient: AzureOpenAiChatClientBase;

constructor(
Expand All @@ -72,6 +73,19 @@ export class AzureOpenAiChatClient extends BaseChatModel<AzureOpenAiChatCallOpti
this.presence_penalty = fields.presence_penalty;
this.frequency_penalty = fields.frequency_penalty;
this.max_tokens = fields.max_tokens;
// Initialize streaming flags with LangChain-compatible behavior:
// - `streaming`: true enables auto-streaming in `invoke()` calls
// - `disableStreaming`: true overrides streaming flag
// - `streaming`: `false` causes `disableStreaming` to be set to `true` for framework compatibility
this.disableStreaming = fields?.disableStreaming === true;
// if streaming is explicitly false, streaming is disabled
if (fields?.streaming === false) {
this.disableStreaming = true;
}
// Enable streaming only when `streaming` is `true` (default `false`) and `disableStreaming` is not `true` (default `undefined`).
this.streaming =
fields?.streaming === true && this.disableStreaming !== true;

if (fields.supportsStrictToolCalling !== undefined) {
this.supportsStrictToolCalling = fields.supportsStrictToolCalling;
}
Expand All @@ -86,6 +100,21 @@ export class AzureOpenAiChatClient extends BaseChatModel<AzureOpenAiChatCallOpti
options: typeof this.ParsedCallOptions,
runManager?: CallbackManagerForLLMRun
): Promise<ChatResult> {
// Auto-streaming: if streaming is enabled, use _streamResponseChunks
// and concatenate chunks transparently for the caller
if (this.streaming) {
let generation;
const stream = this._streamResponseChunks(messages, options, runManager);
for await (const chunk of stream) {
generation =
generation === undefined ? chunk : generation.concat(chunk);
}
if (generation === undefined) {
throw new Error('No chunks were generated from the stream.');
}
return { generations: [generation] };
}

const res = await this.caller.callWithOptions(
{
signal: options.signal
Expand Down
6 changes: 6 additions & 0 deletions packages/langchain/src/openai/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ export type AzureOpenAiChatModelParams = Pick<
* If `undefined` the `strict` argument will not be passed to OpenAI.
*/
supportsStrictToolCalling?: boolean;
/**
* Whether the model should automatically stream responses when using `invoke()`.
* If {@link disableStreaming} is set to `true`, this option will be ignored.
* If {@link streaming} is explicitly set to `false`, {@link disableStreaming} will be set to `true`.
*/
streaming?: boolean;
} & BaseChatModelParams &
ModelConfig<AzureOpenAiChatModel> &
ResourceGroupConfig;
Expand Down
Loading
Loading