Skip to content

Commit 5bfb6fb

Browse files
authored
fix: Github Copilot add streaming support and improve robustness (#2113)
- Add streaming response with SSE parsing (eventsource-parser) - Add Accept: text/event-stream header and content-type validation - Add empty output detection to prevent silent failures - Unify token expiry logic between getAuthState() and getValidCopilotToken() - Add proper resource cleanup with reader.cancel() - Add request cancellation handling in GitHubCopilotAuth component - Extract HTTP_STATUS_MESSAGES constant - Remove fallback mechanism for simpler error handling
1 parent 957b62a commit 5bfb6fb

File tree

3 files changed

+697
-117
lines changed

3 files changed

+697
-117
lines changed

src/LLMProviders/githubCopilot/GitHubCopilotChatModel.ts

Lines changed: 140 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ import {
22
BaseChatModel,
33
type BaseChatModelParams,
44
} from "@langchain/core/language_models/chat_models";
5-
import { AIMessage, type BaseMessage, type MessageContent } from "@langchain/core/messages";
6-
import { type ChatResult, ChatGeneration } from "@langchain/core/outputs";
5+
import { AIMessage, AIMessageChunk, type BaseMessage, type MessageContent } from "@langchain/core/messages";
6+
import { type ChatResult, ChatGeneration, ChatGenerationChunk } from "@langchain/core/outputs";
77
import { type CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager";
88
import { GitHubCopilotProvider } from "./GitHubCopilotProvider";
99
import { extractTextFromChunk } from "@/utils";
@@ -13,6 +13,7 @@ const CHARS_PER_TOKEN = 4;
1313

1414
export interface GitHubCopilotChatModelParams extends BaseChatModelParams {
1515
modelName: string;
16+
streaming?: boolean;
1617
}
1718

1819
/**
@@ -24,11 +25,13 @@ export class GitHubCopilotChatModel extends BaseChatModel {
2425

2526
private provider: GitHubCopilotProvider;
2627
modelName: string;
28+
streaming: boolean;
2729

2830
constructor(fields: GitHubCopilotChatModelParams) {
2931
super(fields);
3032
this.provider = GitHubCopilotProvider.getInstance();
3133
this.modelName = fields.modelName;
34+
this.streaming = fields.streaming ?? true;
3235
}
3336

3437
_llmType(): string {
@@ -57,6 +60,16 @@ export class GitHubCopilotChatModel extends BaseChatModel {
5760
}
5861
}
5962

63+
/**
64+
* Convert LangChain messages to Copilot API format.
65+
*/
66+
private toCopilotMessages(messages: BaseMessage[]): Array<{ role: string; content: string }> {
67+
return messages.map((m) => ({
68+
role: this.convertMessageType(m._getType()),
69+
content: extractTextFromChunk(m.content),
70+
}));
71+
}
72+
6073
/**
6174
* Generate chat completion
6275
*/
@@ -65,29 +78,146 @@ export class GitHubCopilotChatModel extends BaseChatModel {
6578
_options: this["ParsedCallOptions"],
6679
_runManager?: CallbackManagerForLLMRun
6780
): Promise<ChatResult> {
68-
// Convert LangChain messages to OpenAI format
69-
const chatMessages = messages.map((m) => ({
70-
role: this.convertMessageType(m._getType()),
71-
content: extractTextFromChunk(m.content),
72-
}));
81+
const chatMessages = this.toCopilotMessages(messages);
7382

7483
// Call Copilot API
7584
const response = await this.provider.sendChatMessage(chatMessages, this.modelName);
76-
const content = response.choices?.[0]?.message?.content || "";
85+
const choice = response.choices?.[0];
86+
const content = choice?.message?.content || "";
87+
const finishReason = choice?.finish_reason;
88+
89+
// Map token usage to camelCase format expected by the project
90+
const tokenUsage = response.usage
91+
? {
92+
promptTokens: response.usage.prompt_tokens,
93+
completionTokens: response.usage.completion_tokens,
94+
totalTokens: response.usage.total_tokens,
95+
}
96+
: undefined;
97+
98+
// Build response_metadata for truncation detection and token usage extraction
99+
const responseMetadata = {
100+
finish_reason: finishReason,
101+
tokenUsage,
102+
model: response.model,
103+
};
77104

78105
const generation: ChatGeneration = {
79106
text: content,
80-
message: new AIMessage(content),
107+
message: new AIMessage({
108+
content,
109+
response_metadata: responseMetadata,
110+
}),
111+
generationInfo: { finish_reason: finishReason },
81112
};
82113

83114
return {
84115
generations: [generation],
85116
llmOutput: {
86-
tokenUsage: response.usage,
117+
tokenUsage,
87118
},
88119
};
89120
}
90121

122+
/**
123+
* Stream chat completion chunks.
124+
* If streaming is disabled, yields a single chunk from _generate.
125+
* If streaming fails, the error is propagated (no silent fallback).
126+
*/
127+
override async *_streamResponseChunks(
128+
messages: BaseMessage[],
129+
options: this["ParsedCallOptions"],
130+
runManager?: CallbackManagerForLLMRun
131+
): AsyncGenerator<ChatGenerationChunk> {
132+
// If streaming is disabled, use _generate and yield as single chunk
133+
if (!this.streaming) {
134+
const result = await this._generate(messages, options, runManager);
135+
const generation = result.generations[0];
136+
if (!generation) return;
137+
138+
const messageChunk = new AIMessageChunk({
139+
content: generation.text,
140+
response_metadata: generation.message.response_metadata,
141+
});
142+
143+
const generationChunk = new ChatGenerationChunk({
144+
message: messageChunk,
145+
text: generation.text,
146+
generationInfo: generation.generationInfo,
147+
});
148+
149+
if (runManager && generation.text) {
150+
await runManager.handleLLMNewToken(generation.text);
151+
}
152+
153+
yield generationChunk;
154+
return;
155+
}
156+
157+
const chatMessages = this.toCopilotMessages(messages);
158+
let didYieldChunk = false;
159+
160+
// Stream directly, no fallback - errors are propagated to caller
161+
for await (const chunk of this.provider.sendChatMessageStream(
162+
chatMessages,
163+
this.modelName,
164+
options?.signal
165+
)) {
166+
const choice = chunk.choices?.[0];
167+
const content = choice?.delta?.content || "";
168+
169+
// Don't skip chunks with usage or finish_reason even if content is empty
170+
const hasMetadata = choice?.finish_reason || chunk.usage || choice?.delta?.role;
171+
if (!content && !hasMetadata) {
172+
continue;
173+
}
174+
175+
// Build response_metadata for the chunk
176+
const responseMetadata: Record<string, unknown> = {};
177+
if (choice?.finish_reason) {
178+
responseMetadata.finish_reason = choice.finish_reason;
179+
}
180+
if (choice?.delta?.role) {
181+
responseMetadata.role = choice.delta.role;
182+
}
183+
if (chunk.usage) {
184+
responseMetadata.tokenUsage = {
185+
promptTokens: chunk.usage.prompt_tokens,
186+
completionTokens: chunk.usage.completion_tokens,
187+
totalTokens: chunk.usage.total_tokens,
188+
};
189+
}
190+
if (chunk.model) {
191+
responseMetadata.model = chunk.model;
192+
}
193+
194+
const messageChunk = new AIMessageChunk({
195+
content,
196+
response_metadata: Object.keys(responseMetadata).length > 0 ? responseMetadata : undefined,
197+
});
198+
199+
const generationChunk = new ChatGenerationChunk({
200+
message: messageChunk,
201+
text: content,
202+
generationInfo: choice?.finish_reason ? { finish_reason: choice.finish_reason } : undefined,
203+
});
204+
205+
// Notify run manager of new token
206+
if (runManager && content) {
207+
await runManager.handleLLMNewToken(content);
208+
}
209+
210+
didYieldChunk = true;
211+
yield generationChunk;
212+
}
213+
214+
// Detect silent failures where streaming completed but produced no chunks at all.
215+
// Avoid treating metadata-only streams as failures.
216+
if (!didYieldChunk) {
217+
throw new Error("GitHub Copilot streaming produced no chunks");
218+
}
219+
}
220+
91221
/**
92222
* Simple token estimation based on character count
93223
*/

0 commit comments

Comments
 (0)