Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/gold-pillows-fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"roo-cline": patch
---

Streaming version of o3-mini
31 changes: 29 additions & 2 deletions src/api/providers/openai-native.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ export class OpenAiNativeHandler implements ApiHandler, SingleCompletionHandler
switch (modelId) {
case "o1":
case "o1-preview":
case "o1-mini":
case "o3-mini": {
case "o1-mini": {
// o1-preview and o1-mini don't support streaming, non-1 temp, or system prompt
// o1 doesnt support streaming or non-1 temp but does support a developer prompt
const response = await this.client.chat.completions.create({
Expand All @@ -49,6 +48,34 @@ export class OpenAiNativeHandler implements ApiHandler, SingleCompletionHandler
}
break
}
case "o3-mini": {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider using helper functions for the repeated logic of handling the stream and yielding results. This will reduce redundancy and improve readability. This is from our Development Standards: https://www.notion.so/Development-Standards-59febcf8ead647fd9c2ec3f60c22f3df?pvs=4#11869ad2d5818094a05ef707e188c0d5

const stream = await this.client.chat.completions.create({
model: this.getModel().id,
messages: [{ role: "developer", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
stream: true,
stream_options: { include_usage: true },
})

for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta
if (delta?.content) {
yield {
type: "text",
text: delta.content,
}
}

// contains a null value except for the last chunk which contains the token usage statistics for the entire request
if (chunk.usage) {
yield {
type: "usage",
inputTokens: chunk.usage.prompt_tokens || 0,
outputTokens: chunk.usage.completion_tokens || 0,
}
}
}
break
}
default: {
const stream = await this.client.chat.completions.create({
model: this.getModel().id,
Expand Down
Loading