|
2 | 2 |
|
3 | 3 | import { Anthropic } from "@anthropic-ai/sdk" |
4 | 4 | import { AnthropicVertex } from "@anthropic-ai/vertex-sdk" |
| 5 | +import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta" |
5 | 6 |
|
6 | 7 | import { VertexHandler } from "../vertex" |
7 | 8 | import { ApiStreamChunk } from "../../transform/stream" |
@@ -431,6 +432,138 @@ describe("VertexHandler", () => { |
431 | 432 | }) |
432 | 433 | }) |
433 | 434 |
|
| 435 | + describe("thinking functionality", () => { |
| 436 | + const mockMessages: Anthropic.Messages.MessageParam[] = [ |
| 437 | + { |
| 438 | + role: "user", |
| 439 | + content: "Hello", |
| 440 | + }, |
| 441 | + ] |
| 442 | + |
| 443 | + const systemPrompt = "You are a helpful assistant" |
| 444 | + |
| 445 | + it("should handle thinking content blocks and deltas", async () => { |
| 446 | + const mockStream = [ |
| 447 | + { |
| 448 | + type: "message_start", |
| 449 | + message: { |
| 450 | + usage: { |
| 451 | + input_tokens: 10, |
| 452 | + output_tokens: 0, |
| 453 | + }, |
| 454 | + }, |
| 455 | + }, |
| 456 | + { |
| 457 | + type: "content_block_start", |
| 458 | + index: 0, |
| 459 | + content_block: { |
| 460 | + type: "thinking", |
| 461 | + thinking: "Let me think about this...", |
| 462 | + }, |
| 463 | + }, |
| 464 | + { |
| 465 | + type: "content_block_delta", |
| 466 | + delta: { |
| 467 | + type: "thinking_delta", |
| 468 | + thinking: " I need to consider all options.", |
| 469 | + }, |
| 470 | + }, |
| 471 | + { |
| 472 | + type: "content_block_start", |
| 473 | + index: 1, |
| 474 | + content_block: { |
| 475 | + type: "text", |
| 476 | + text: "Here's my answer:", |
| 477 | + }, |
| 478 | + }, |
| 479 | + ] |
| 480 | + |
| 481 | + // Setup async iterator for mock stream |
| 482 | + const asyncIterator = { |
| 483 | + async *[Symbol.asyncIterator]() { |
| 484 | + for (const chunk of mockStream) { |
| 485 | + yield chunk |
| 486 | + } |
| 487 | + }, |
| 488 | + } |
| 489 | + |
| 490 | + const mockCreate = jest.fn().mockResolvedValue(asyncIterator) |
| 491 | + ;(handler["client"].messages as any).create = mockCreate |
| 492 | + |
| 493 | + const stream = handler.createMessage(systemPrompt, mockMessages) |
| 494 | + const chunks: ApiStreamChunk[] = [] |
| 495 | + |
| 496 | + for await (const chunk of stream) { |
| 497 | + chunks.push(chunk) |
| 498 | + } |
| 499 | + |
| 500 | + // Verify thinking content is processed correctly |
| 501 | + const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning") |
| 502 | + expect(reasoningChunks).toHaveLength(2) |
| 503 | + expect(reasoningChunks[0].text).toBe("Let me think about this...") |
| 504 | + expect(reasoningChunks[1].text).toBe(" I need to consider all options.") |
| 505 | + |
| 506 | + // Verify text content is processed correctly |
| 507 | + const textChunks = chunks.filter((chunk) => chunk.type === "text") |
| 508 | + expect(textChunks).toHaveLength(2) // One for the text block, one for the newline |
| 509 | + expect(textChunks[0].text).toBe("\n") |
| 510 | + expect(textChunks[1].text).toBe("Here's my answer:") |
| 511 | + }) |
| 512 | + |
| 513 | + it("should handle multiple thinking blocks with line breaks", async () => { |
| 514 | + const mockStream = [ |
| 515 | + { |
| 516 | + type: "content_block_start", |
| 517 | + index: 0, |
| 518 | + content_block: { |
| 519 | + type: "thinking", |
| 520 | + thinking: "First thinking block", |
| 521 | + }, |
| 522 | + }, |
| 523 | + { |
| 524 | + type: "content_block_start", |
| 525 | + index: 1, |
| 526 | + content_block: { |
| 527 | + type: "thinking", |
| 528 | + thinking: "Second thinking block", |
| 529 | + }, |
| 530 | + }, |
| 531 | + ] |
| 532 | + |
| 533 | + const asyncIterator = { |
| 534 | + async *[Symbol.asyncIterator]() { |
| 535 | + for (const chunk of mockStream) { |
| 536 | + yield chunk |
| 537 | + } |
| 538 | + }, |
| 539 | + } |
| 540 | + |
| 541 | + const mockCreate = jest.fn().mockResolvedValue(asyncIterator) |
| 542 | + ;(handler["client"].messages as any).create = mockCreate |
| 543 | + |
| 544 | + const stream = handler.createMessage(systemPrompt, mockMessages) |
| 545 | + const chunks: ApiStreamChunk[] = [] |
| 546 | + |
| 547 | + for await (const chunk of stream) { |
| 548 | + chunks.push(chunk) |
| 549 | + } |
| 550 | + |
| 551 | + expect(chunks.length).toBe(3) |
| 552 | + expect(chunks[0]).toEqual({ |
| 553 | + type: "reasoning", |
| 554 | + text: "First thinking block", |
| 555 | + }) |
| 556 | + expect(chunks[1]).toEqual({ |
| 557 | + type: "reasoning", |
| 558 | + text: "\n", |
| 559 | + }) |
| 560 | + expect(chunks[2]).toEqual({ |
| 561 | + type: "reasoning", |
| 562 | + text: "Second thinking block", |
| 563 | + }) |
| 564 | + }) |
| 565 | + }) |
| 566 | + |
434 | 567 | describe("completePrompt", () => { |
435 | 568 | it("should complete prompt successfully", async () => { |
436 | 569 | const result = await handler.completePrompt("Test prompt") |
@@ -500,4 +633,121 @@ describe("VertexHandler", () => { |
500 | 633 | expect(modelInfo.id).toBe("claude-3-7-sonnet@20250219") // Default model |
501 | 634 | }) |
502 | 635 | }) |
| 636 | + |
| 637 | + describe("thinking model configuration", () => { |
| 638 | + it("should configure thinking for models with :thinking suffix", () => { |
| 639 | + const thinkingHandler = new VertexHandler({ |
| 640 | + apiModelId: "claude-3-7-sonnet@20250219:thinking", |
| 641 | + vertexProjectId: "test-project", |
| 642 | + vertexRegion: "us-central1", |
| 643 | + modelMaxTokens: 16384, |
| 644 | + vertexThinking: 4096, |
| 645 | + }) |
| 646 | + |
| 647 | + const modelInfo = thinkingHandler.getModel() |
| 648 | + |
| 649 | + // Verify thinking configuration |
| 650 | + expect(modelInfo.id).toBe("claude-3-7-sonnet@20250219") |
| 651 | + expect(modelInfo.thinking).toBeDefined() |
| 652 | + const thinkingConfig = modelInfo.thinking as { type: "enabled"; budget_tokens: number } |
| 653 | + expect(thinkingConfig.type).toBe("enabled") |
| 654 | + expect(thinkingConfig.budget_tokens).toBe(4096) |
| 655 | + expect(modelInfo.temperature).toBe(1.0) // Thinking requires temperature 1.0 |
| 656 | + }) |
| 657 | + |
| 658 | + it("should calculate thinking budget correctly", () => { |
| 659 | + // Test with explicit thinking budget |
| 660 | + const handlerWithBudget = new VertexHandler({ |
| 661 | + apiModelId: "claude-3-7-sonnet@20250219:thinking", |
| 662 | + vertexProjectId: "test-project", |
| 663 | + vertexRegion: "us-central1", |
| 664 | + modelMaxTokens: 16384, |
| 665 | + vertexThinking: 5000, |
| 666 | + }) |
| 667 | + |
| 668 | + expect((handlerWithBudget.getModel().thinking as any).budget_tokens).toBe(5000) |
| 669 | + |
| 670 | + // Test with default thinking budget (80% of max tokens) |
| 671 | + const handlerWithDefaultBudget = new VertexHandler({ |
| 672 | + apiModelId: "claude-3-7-sonnet@20250219:thinking", |
| 673 | + vertexProjectId: "test-project", |
| 674 | + vertexRegion: "us-central1", |
| 675 | + modelMaxTokens: 10000, |
| 676 | + }) |
| 677 | + |
| 678 | + expect((handlerWithDefaultBudget.getModel().thinking as any).budget_tokens).toBe(8000) // 80% of 10000 |
| 679 | + |
| 680 | + // Test with minimum thinking budget (should be at least 1024) |
| 681 | + const handlerWithSmallMaxTokens = new VertexHandler({ |
| 682 | + apiModelId: "claude-3-7-sonnet@20250219:thinking", |
| 683 | + vertexProjectId: "test-project", |
| 684 | + vertexRegion: "us-central1", |
| 685 | + modelMaxTokens: 1000, // This would result in 800 tokens for thinking, but minimum is 1024 |
| 686 | + }) |
| 687 | + |
| 688 | + expect((handlerWithSmallMaxTokens.getModel().thinking as any).budget_tokens).toBe(1024) |
| 689 | + }) |
| 690 | + |
| 691 | + it("should use anthropicThinking value if vertexThinking is not provided", () => { |
| 692 | + const handler = new VertexHandler({ |
| 693 | + apiModelId: "claude-3-7-sonnet@20250219:thinking", |
| 694 | + vertexProjectId: "test-project", |
| 695 | + vertexRegion: "us-central1", |
| 696 | + modelMaxTokens: 16384, |
| 697 | + anthropicThinking: 6000, // Should be used as fallback |
| 698 | + }) |
| 699 | + |
| 700 | + expect((handler.getModel().thinking as any).budget_tokens).toBe(6000) |
| 701 | + }) |
| 702 | + |
| 703 | + it("should pass thinking configuration to API", async () => { |
| 704 | + const thinkingHandler = new VertexHandler({ |
| 705 | + apiModelId: "claude-3-7-sonnet@20250219:thinking", |
| 706 | + vertexProjectId: "test-project", |
| 707 | + vertexRegion: "us-central1", |
| 708 | + modelMaxTokens: 16384, |
| 709 | + vertexThinking: 4096, |
| 710 | + }) |
| 711 | + |
| 712 | + const mockCreate = jest.fn().mockImplementation(async (options) => { |
| 713 | + if (!options.stream) { |
| 714 | + return { |
| 715 | + id: "test-completion", |
| 716 | + content: [{ type: "text", text: "Test response" }], |
| 717 | + role: "assistant", |
| 718 | + model: options.model, |
| 719 | + usage: { |
| 720 | + input_tokens: 10, |
| 721 | + output_tokens: 5, |
| 722 | + }, |
| 723 | + } |
| 724 | + } |
| 725 | + return { |
| 726 | + async *[Symbol.asyncIterator]() { |
| 727 | + yield { |
| 728 | + type: "message_start", |
| 729 | + message: { |
| 730 | + usage: { |
| 731 | + input_tokens: 10, |
| 732 | + output_tokens: 5, |
| 733 | + }, |
| 734 | + }, |
| 735 | + } |
| 736 | + }, |
| 737 | + } |
| 738 | + }) |
| 739 | + ;(thinkingHandler["client"].messages as any).create = mockCreate |
| 740 | + |
| 741 | + await thinkingHandler |
| 742 | + .createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }]) |
| 743 | + .next() |
| 744 | + |
| 745 | + expect(mockCreate).toHaveBeenCalledWith( |
| 746 | + expect.objectContaining({ |
| 747 | + thinking: { type: "enabled", budget_tokens: 4096 }, |
| 748 | + temperature: 1.0, // Thinking requires temperature 1.0 |
| 749 | + }), |
| 750 | + ) |
| 751 | + }) |
| 752 | + }) |
503 | 753 | }) |
0 commit comments