-
Notifications
You must be signed in to change notification settings - Fork 2.6k
fix: respect user-configured context window for Ollama models #7344
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -100,6 +100,103 @@ describe("NativeOllamaHandler", () => { | |
| expect(results.some((r) => r.type === "reasoning")).toBe(true) | ||
| expect(results.some((r) => r.type === "text")).toBe(true) | ||
| }) | ||
|
|
||
| describe("context window configuration", () => { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Excellent test coverage! These tests thoroughly verify both the custom context window usage and the fallback behavior. Consider adding an edge case test for invalid values (0 or negative) once we add schema validation. |
||
| it("should use custom context window when ollamaContextWindow is provided", async () => { | ||
| const customContextWindow = 48000 | ||
| const optionsWithCustomContext: ApiHandlerOptions = { | ||
| apiModelId: "llama2", | ||
| ollamaModelId: "llama2", | ||
| ollamaBaseUrl: "http://localhost:11434", | ||
| ollamaContextWindow: customContextWindow, | ||
| } | ||
| handler = new NativeOllamaHandler(optionsWithCustomContext) | ||
|
|
||
| // Mock the chat response | ||
| mockChat.mockImplementation(async function* () { | ||
| yield { | ||
| message: { content: "Test response" }, | ||
| eval_count: 10, | ||
| prompt_eval_count: 5, | ||
| } | ||
| }) | ||
|
|
||
| // Create a message to trigger the chat call | ||
| const generator = handler.createMessage("System prompt", [{ role: "user", content: "Test message" }]) | ||
|
|
||
| // Consume the generator | ||
| const results = [] | ||
| for await (const chunk of generator) { | ||
| results.push(chunk) | ||
| } | ||
|
|
||
| // Verify that chat was called with the custom context window | ||
| expect(mockChat).toHaveBeenCalledWith( | ||
| expect.objectContaining({ | ||
| options: expect.objectContaining({ | ||
| num_ctx: customContextWindow, | ||
| }), | ||
| }), | ||
| ) | ||
| }) | ||
|
|
||
| it("should use model's default context window when ollamaContextWindow is not provided", async () => { | ||
| // Mock the chat response | ||
| mockChat.mockImplementation(async function* () { | ||
| yield { | ||
| message: { content: "Test response" }, | ||
| eval_count: 10, | ||
| prompt_eval_count: 5, | ||
| } | ||
| }) | ||
|
|
||
| // Create a message to trigger the chat call | ||
| const generator = handler.createMessage("System prompt", [{ role: "user", content: "Test message" }]) | ||
|
|
||
| // Consume the generator | ||
| const results = [] | ||
| for await (const chunk of generator) { | ||
| results.push(chunk) | ||
| } | ||
|
|
||
| // Verify that chat was called with the model's default context window (4096) | ||
| expect(mockChat).toHaveBeenCalledWith( | ||
| expect.objectContaining({ | ||
| options: expect.objectContaining({ | ||
| num_ctx: 4096, | ||
| }), | ||
| }), | ||
| ) | ||
| }) | ||
|
|
||
| it("should use custom context window in completePrompt method", async () => { | ||
| const customContextWindow = 48000 | ||
| const optionsWithCustomContext: ApiHandlerOptions = { | ||
| apiModelId: "llama2", | ||
| ollamaModelId: "llama2", | ||
| ollamaBaseUrl: "http://localhost:11434", | ||
| ollamaContextWindow: customContextWindow, | ||
| } | ||
| handler = new NativeOllamaHandler(optionsWithCustomContext) | ||
|
|
||
| // Mock the chat response | ||
| mockChat.mockResolvedValue({ | ||
| message: { content: "Test response" }, | ||
| }) | ||
|
|
||
| // Call completePrompt | ||
| await handler.completePrompt("Test prompt") | ||
|
|
||
| // Verify that chat was called with the custom context window | ||
| expect(mockChat).toHaveBeenCalledWith( | ||
| expect.objectContaining({ | ||
| options: expect.objectContaining({ | ||
| num_ctx: customContextWindow, | ||
| }), | ||
| }), | ||
| ) | ||
| }) | ||
| }) | ||
| }) | ||
|
|
||
| describe("completePrompt", () => { | ||
|
|
@@ -115,6 +212,7 @@ describe("NativeOllamaHandler", () => { | |
| messages: [{ role: "user", content: "Tell me a joke" }], | ||
| stream: false, | ||
| options: { | ||
| num_ctx: 4096, | ||
| temperature: 0, | ||
| }, | ||
| }) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -181,7 +181,7 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio | |
| messages: ollamaMessages, | ||
| stream: true, | ||
| options: { | ||
| num_ctx: modelInfo.contextWindow, | ||
| num_ctx: this.options.ollamaContextWindow || modelInfo.contextWindow, | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice implementation using the fallback pattern! The optional chaining with || fallback to modelInfo.contextWindow is clean and follows the existing patterns in the codebase. |
||
| temperature: this.options.modelTemperature ?? (useR1Format ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0), | ||
| }, | ||
| }) | ||
|
|
@@ -262,14 +262,15 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio | |
| async completePrompt(prompt: string): Promise<string> { | ||
| try { | ||
| const client = this.ensureClient() | ||
| const { id: modelId } = await this.fetchModel() | ||
| const { id: modelId, info: modelInfo } = await this.fetchModel() | ||
| const useR1Format = modelId.toLowerCase().includes("deepseek-r1") | ||
|
|
||
| const response = await client.chat({ | ||
| model: modelId, | ||
| messages: [{ role: "user", content: prompt }], | ||
| stream: false, | ||
| options: { | ||
| num_ctx: this.options.ollamaContextWindow || modelInfo.contextWindow, | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good consistency - same pattern applied to the non-streaming method. Though I wonder if we should add more specific error handling here to distinguish between model fetch failures and context window configuration issues? |
||
| temperature: this.options.modelTemperature ?? (useR1Format ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0), | ||
| }, | ||
| }) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we add validation constraints here? Something like:
This would prevent invalid values like 0 or negative numbers from being set.