22 BaseChatModel ,
33 type BaseChatModelParams ,
44} from "@langchain/core/language_models/chat_models" ;
5- import { AIMessage , type BaseMessage , type MessageContent } from "@langchain/core/messages" ;
6- import { type ChatResult , ChatGeneration } from "@langchain/core/outputs" ;
5+ import { AIMessage , AIMessageChunk , type BaseMessage , type MessageContent } from "@langchain/core/messages" ;
6+ import { type ChatResult , ChatGeneration , ChatGenerationChunk } from "@langchain/core/outputs" ;
77import { type CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager" ;
88import { GitHubCopilotProvider } from "./GitHubCopilotProvider" ;
99import { extractTextFromChunk } from "@/utils" ;
@@ -13,6 +13,7 @@ const CHARS_PER_TOKEN = 4;
1313
1414export interface GitHubCopilotChatModelParams extends BaseChatModelParams {
1515 modelName : string ;
16+ streaming ?: boolean ;
1617}
1718
1819/**
@@ -24,11 +25,13 @@ export class GitHubCopilotChatModel extends BaseChatModel {
2425
2526 private provider : GitHubCopilotProvider ;
2627 modelName : string ;
28+ streaming : boolean ;
2729
2830 constructor ( fields : GitHubCopilotChatModelParams ) {
2931 super ( fields ) ;
3032 this . provider = GitHubCopilotProvider . getInstance ( ) ;
3133 this . modelName = fields . modelName ;
34+ this . streaming = fields . streaming ?? true ;
3235 }
3336
3437 _llmType ( ) : string {
@@ -57,6 +60,16 @@ export class GitHubCopilotChatModel extends BaseChatModel {
5760 }
5861 }
5962
63+ /**
64+ * Convert LangChain messages to Copilot API format.
65+ */
66+ private toCopilotMessages ( messages : BaseMessage [ ] ) : Array < { role : string ; content : string } > {
67+ return messages . map ( ( m ) => ( {
68+ role : this . convertMessageType ( m . _getType ( ) ) ,
69+ content : extractTextFromChunk ( m . content ) ,
70+ } ) ) ;
71+ }
72+
6073 /**
6174 * Generate chat completion
6275 */
@@ -65,29 +78,146 @@ export class GitHubCopilotChatModel extends BaseChatModel {
6578 _options : this[ "ParsedCallOptions" ] ,
6679 _runManager ?: CallbackManagerForLLMRun
6780 ) : Promise < ChatResult > {
68- // Convert LangChain messages to OpenAI format
69- const chatMessages = messages . map ( ( m ) => ( {
70- role : this . convertMessageType ( m . _getType ( ) ) ,
71- content : extractTextFromChunk ( m . content ) ,
72- } ) ) ;
81+ const chatMessages = this . toCopilotMessages ( messages ) ;
7382
7483 // Call Copilot API
7584 const response = await this . provider . sendChatMessage ( chatMessages , this . modelName ) ;
76- const content = response . choices ?. [ 0 ] ?. message ?. content || "" ;
85+ const choice = response . choices ?. [ 0 ] ;
86+ const content = choice ?. message ?. content || "" ;
87+ const finishReason = choice ?. finish_reason ;
88+
89+ // Map token usage to camelCase format expected by the project
90+ const tokenUsage = response . usage
91+ ? {
92+ promptTokens : response . usage . prompt_tokens ,
93+ completionTokens : response . usage . completion_tokens ,
94+ totalTokens : response . usage . total_tokens ,
95+ }
96+ : undefined ;
97+
98+ // Build response_metadata for truncation detection and token usage extraction
99+ const responseMetadata = {
100+ finish_reason : finishReason ,
101+ tokenUsage,
102+ model : response . model ,
103+ } ;
77104
78105 const generation : ChatGeneration = {
79106 text : content ,
80- message : new AIMessage ( content ) ,
107+ message : new AIMessage ( {
108+ content,
109+ response_metadata : responseMetadata ,
110+ } ) ,
111+ generationInfo : { finish_reason : finishReason } ,
81112 } ;
82113
83114 return {
84115 generations : [ generation ] ,
85116 llmOutput : {
86- tokenUsage : response . usage ,
117+ tokenUsage,
87118 } ,
88119 } ;
89120 }
90121
122+ /**
123+ * Stream chat completion chunks.
124+ * If streaming is disabled, yields a single chunk from _generate.
125+ * If streaming fails, the error is propagated (no silent fallback).
126+ */
127+ override async * _streamResponseChunks (
128+ messages : BaseMessage [ ] ,
129+ options : this[ "ParsedCallOptions" ] ,
130+ runManager ?: CallbackManagerForLLMRun
131+ ) : AsyncGenerator < ChatGenerationChunk > {
132+ // If streaming is disabled, use _generate and yield as single chunk
133+ if ( ! this . streaming ) {
134+ const result = await this . _generate ( messages , options , runManager ) ;
135+ const generation = result . generations [ 0 ] ;
136+ if ( ! generation ) return ;
137+
138+ const messageChunk = new AIMessageChunk ( {
139+ content : generation . text ,
140+ response_metadata : generation . message . response_metadata ,
141+ } ) ;
142+
143+ const generationChunk = new ChatGenerationChunk ( {
144+ message : messageChunk ,
145+ text : generation . text ,
146+ generationInfo : generation . generationInfo ,
147+ } ) ;
148+
149+ if ( runManager && generation . text ) {
150+ await runManager . handleLLMNewToken ( generation . text ) ;
151+ }
152+
153+ yield generationChunk ;
154+ return ;
155+ }
156+
157+ const chatMessages = this . toCopilotMessages ( messages ) ;
158+ let didYieldChunk = false ;
159+
160+ // Stream directly, no fallback - errors are propagated to caller
161+ for await ( const chunk of this . provider . sendChatMessageStream (
162+ chatMessages ,
163+ this . modelName ,
164+ options ?. signal
165+ ) ) {
166+ const choice = chunk . choices ?. [ 0 ] ;
167+ const content = choice ?. delta ?. content || "" ;
168+
169+ // Don't skip chunks with usage or finish_reason even if content is empty
170+ const hasMetadata = choice ?. finish_reason || chunk . usage || choice ?. delta ?. role ;
171+ if ( ! content && ! hasMetadata ) {
172+ continue ;
173+ }
174+
175+ // Build response_metadata for the chunk
176+ const responseMetadata : Record < string , unknown > = { } ;
177+ if ( choice ?. finish_reason ) {
178+ responseMetadata . finish_reason = choice . finish_reason ;
179+ }
180+ if ( choice ?. delta ?. role ) {
181+ responseMetadata . role = choice . delta . role ;
182+ }
183+ if ( chunk . usage ) {
184+ responseMetadata . tokenUsage = {
185+ promptTokens : chunk . usage . prompt_tokens ,
186+ completionTokens : chunk . usage . completion_tokens ,
187+ totalTokens : chunk . usage . total_tokens ,
188+ } ;
189+ }
190+ if ( chunk . model ) {
191+ responseMetadata . model = chunk . model ;
192+ }
193+
194+ const messageChunk = new AIMessageChunk ( {
195+ content,
196+ response_metadata : Object . keys ( responseMetadata ) . length > 0 ? responseMetadata : undefined ,
197+ } ) ;
198+
199+ const generationChunk = new ChatGenerationChunk ( {
200+ message : messageChunk ,
201+ text : content ,
202+ generationInfo : choice ?. finish_reason ? { finish_reason : choice . finish_reason } : undefined ,
203+ } ) ;
204+
205+ // Notify run manager of new token
206+ if ( runManager && content ) {
207+ await runManager . handleLLMNewToken ( content ) ;
208+ }
209+
210+ didYieldChunk = true ;
211+ yield generationChunk ;
212+ }
213+
214+ // Detect silent failures where streaming completed but produced no chunks at all.
215+ // Avoid treating metadata-only streams as failures.
216+ if ( ! didYieldChunk ) {
217+ throw new Error ( "GitHub Copilot streaming produced no chunks" ) ;
218+ }
219+ }
220+
91221 /**
92222 * Simple token estimation based on character count
93223 */
0 commit comments