@@ -31,6 +31,9 @@ export type OpenAIRequest = {
31
31
top_p ?: number ;
32
32
n ?: number ;
33
33
stream ?: boolean ;
34
+ stream_options : {
35
+ include_usage : boolean ;
36
+ } ;
34
37
stop ?: string | string [ ] ;
35
38
max_tokens ?: number ;
36
39
presence_penalty ?: number ;
@@ -69,7 +72,7 @@ export type OpenAIResponseUsage = {
69
72
70
73
export type OpenAIResponseChoice = {
71
74
index : number ;
72
- message : {
75
+ message ? : {
73
76
role : "assistant" | "user" | "system" | "tool" ;
74
77
content : string | null ;
75
78
function_call ?: {
@@ -85,6 +88,9 @@ export type OpenAIResponseChoice = {
85
88
} ;
86
89
} [ ] ;
87
90
} ;
91
+ delta ?: {
92
+ content : string | null ;
93
+ } ;
88
94
finish_reason : "stop" | "length" | "tool_calls" | "content_filter" | null ;
89
95
} ;
90
96
@@ -98,7 +104,10 @@ export type OpenAIResponse = {
98
104
usage ?: OpenAIResponseUsage ;
99
105
} ;
100
106
101
- export type OpenAICompatibleInput = Omit < OpenAIRequest , "stream" | "model" > ;
107
+ export type OpenAICompatibleInput = Omit <
108
+ OpenAIRequest ,
109
+ "stream" | "stream_options" | "model"
110
+ > ;
102
111
103
112
export type OpenAIProviderInput = ILLMProviderInput < OpenAICompatibleInput > ;
104
113
export type OpenAIProviderOutput = ILLMProviderOutput < OpenAIResponse > ;
@@ -126,23 +135,25 @@ export class OpenAILLMSession implements ILLMProvider, ILLMProviderMeta {
126
135
const parser = this . parse ;
127
136
const stream = async function * ( ) {
128
137
for await ( const message of generator ) {
138
+ // NOTE:(kallebysantos) while streaming the final message will not include 'finish_reason'
139
+ // Instead a '[DONE]' value will be returned to close the stream
140
+ if ( "done" in message && message . done ) {
141
+ return ;
142
+ }
143
+
129
144
if ( "error" in message ) {
130
145
if ( message . error instanceof Error ) {
131
146
throw message . error ;
132
- } else {
133
- throw new Error ( message . error as string ) ;
134
147
}
148
+
149
+ throw new Error ( message . error as string ) ;
135
150
}
136
151
137
152
yield parser ( message ) ;
138
- const finishReason = message . choices [ 0 ] . finish_reason ;
139
153
140
- if ( finishReason ) {
141
- if ( finishReason !== "stop" ) {
142
- throw new Error ( "Expected a completed response." ) ;
143
- }
144
-
145
- return ;
154
+ const finish_reason = message . choices . at ( 0 ) ?. finish_reason ;
155
+ if ( finish_reason && finish_reason !== "stop" ) {
156
+ throw new Error ( "Expected a completed response." ) ;
146
157
}
147
158
}
148
159
@@ -172,12 +183,14 @@ export class OpenAILLMSession implements ILLMProvider, ILLMProviderMeta {
172
183
return this . parse ( response ) ;
173
184
}
174
185
175
- private parse ( message : OpenAIResponse ) : OpenAIProviderOutput {
176
- const { usage } = message ;
186
+ private parse ( response : OpenAIResponse ) : OpenAIProviderOutput {
187
+ const { usage } = response ;
188
+ const choice = response . choices . at ( 0 ) ;
177
189
178
190
return {
179
- value : message . choices . at ( 0 ) ?. message . content ?? undefined ,
180
- inner : message ,
191
+ // NOTE:(kallebysantos) while streaming the 'delta' field will be used instead of 'message'
192
+ value : choice ?. message ?. content ?? choice ?. delta ?. content ?? undefined ,
193
+ inner : response ,
181
194
usage : {
182
195
// NOTE:(kallebysantos) usage maybe 'null' while streaming, but the final message will include it
183
196
inputTokens : usage ?. prompt_tokens ?? 0 ,
@@ -204,6 +217,9 @@ export class OpenAILLMSession implements ILLMProvider, ILLMProviderMeta {
204
217
...input ,
205
218
model : this . options . model ,
206
219
stream,
220
+ stream_options : {
221
+ include_usage : true ,
222
+ } ,
207
223
} satisfies OpenAIRequest ,
208
224
) ,
209
225
signal,
0 commit comments