|
3 | 3 | from unittest.mock import AsyncMock, MagicMock, patch |
4 | 4 |
|
5 | 5 | import pytest |
6 | | -from llama_index.core.base.llms.types import ChatMessage |
| 6 | +from llama_index.core.base.llms.types import ChatMessage, ThinkingBlock, TextBlock |
7 | 7 | from llama_index.llms.openai import OpenAI |
8 | 8 | from llama_index.llms.openai.utils import O1_MODELS |
9 | 9 |
|
@@ -601,3 +601,227 @@ def test_reasoning_effort_none_default(): |
601 | 601 | llm = OpenAI(model=model_name, api_key="test-key") |
602 | 602 | kwargs = llm._get_model_kwargs() |
603 | 603 | assert "reasoning_effort" not in kwargs |
| 604 | + |
| 605 | + |
| 606 | +# ===== reasoning_content tests (OpenAI-compatible providers) ===== |
| 607 | + |
| 608 | + |
| 609 | +def _make_chunk( |
| 610 | + delta_kwargs: dict, finish_reason: Optional[str] = None |
| 611 | +) -> ChatCompletionChunk: |
| 612 | + """Helper to create a single ChatCompletionChunk.""" |
| 613 | + extra = delta_kwargs.pop("__extra__", None) |
| 614 | + chunk = ChatCompletionChunk( |
| 615 | + id="chatcmpl-reasoning", |
| 616 | + object="chat.completion.chunk", |
| 617 | + created=1700000000, |
| 618 | + model="qwen3-thinking", |
| 619 | + choices=[ |
| 620 | + ChunkChoice( |
| 621 | + delta=ChoiceDelta(**delta_kwargs), |
| 622 | + finish_reason=finish_reason, |
| 623 | + index=0, |
| 624 | + ) |
| 625 | + ], |
| 626 | + ) |
| 627 | + if extra: |
| 628 | + chunk.choices[0].delta.__pydantic_extra__ = extra |
| 629 | + return chunk |
| 630 | + |
| 631 | + |
| 632 | +def _make_reasoning_stream_chunks() -> list[ChatCompletionChunk]: |
| 633 | + """Simulate an OpenAI-compatible API streaming reasoning_content then content.""" |
| 634 | + return [ |
| 635 | + _make_chunk({"role": "assistant"}), |
| 636 | + _make_chunk( |
| 637 | + {"content": None, "__extra__": {"reasoning_content": "Let me think"}} |
| 638 | + ), |
| 639 | + _make_chunk( |
| 640 | + {"content": None, "__extra__": {"reasoning_content": " about this."}} |
| 641 | + ), |
| 642 | + _make_chunk({"content": "The answer"}), |
| 643 | + _make_chunk({"content": " is 42."}), |
| 644 | + _make_chunk({}, finish_reason="stop"), |
| 645 | + ] |
| 646 | + |
| 647 | + |
| 648 | +@patch("llama_index.llms.openai.base.SyncOpenAI") |
| 649 | +def test_stream_chat_reasoning_content(MockSyncOpenAI: MagicMock) -> None: |
| 650 | + """Test that reasoning_content from streaming is captured as ThinkingBlock and thinking_delta.""" |
| 651 | + with CachedOpenAIApiKeys(set_fake_key=True): |
| 652 | + mock_instance = MockSyncOpenAI.return_value |
| 653 | + mock_instance.chat.completions.create.return_value = iter( |
| 654 | + _make_reasoning_stream_chunks() |
| 655 | + ) |
| 656 | + |
| 657 | + llm = OpenAI(model="gpt-4o", api_key="test-key") |
| 658 | + responses = list(llm.stream_chat([ChatMessage(role="user", content="test")])) |
| 659 | + |
| 660 | + final = responses[-1] |
| 661 | + thinking_blocks = [ |
| 662 | + b for b in final.message.blocks if isinstance(b, ThinkingBlock) |
| 663 | + ] |
| 664 | + text_blocks = [b for b in final.message.blocks if isinstance(b, TextBlock)] |
| 665 | + |
| 666 | + assert len(thinking_blocks) == 1 |
| 667 | + assert thinking_blocks[0].content == "Let me think about this." |
| 668 | + assert len(text_blocks) == 1 |
| 669 | + assert text_blocks[0].text == "The answer is 42." |
| 670 | + |
| 671 | + # Exactly 2 chunks carry thinking_delta (the two reasoning chunks) |
| 672 | + reasoning_chunks = [ |
| 673 | + r for r in responses if r.additional_kwargs.get("thinking_delta") |
| 674 | + ] |
| 675 | + assert len(reasoning_chunks) == 2 |
| 676 | + assert reasoning_chunks[0].additional_kwargs["thinking_delta"] == "Let me think" |
| 677 | + assert reasoning_chunks[1].additional_kwargs["thinking_delta"] == " about this." |
| 678 | + |
| 679 | + |
| 680 | +@pytest.mark.asyncio() |
| 681 | +@patch("llama_index.llms.openai.base.AsyncOpenAI") |
| 682 | +async def test_astream_chat_reasoning_content(MockAsyncOpenAI: MagicMock) -> None: |
| 683 | + """Test that reasoning_content from async streaming is captured as ThinkingBlock.""" |
| 684 | + mock_instance = MockAsyncOpenAI.return_value |
| 685 | + |
| 686 | + async def mock_async_stream(*args: Any, **kwargs: Any) -> AsyncGenerator: |
| 687 | + for chunk in _make_reasoning_stream_chunks(): |
| 688 | + yield chunk |
| 689 | + |
| 690 | + create_fn = AsyncMock() |
| 691 | + create_fn.return_value = mock_async_stream() |
| 692 | + mock_instance.chat.completions.create = create_fn |
| 693 | + |
| 694 | + llm = OpenAI(model="gpt-4o", api_key="test-key") |
| 695 | + response_gen = await llm.astream_chat([ChatMessage(role="user", content="test")]) |
| 696 | + responses = [r async for r in response_gen] |
| 697 | + |
| 698 | + final = responses[-1] |
| 699 | + thinking_blocks = [b for b in final.message.blocks if isinstance(b, ThinkingBlock)] |
| 700 | + text_blocks = [b for b in final.message.blocks if isinstance(b, TextBlock)] |
| 701 | + |
| 702 | + assert len(thinking_blocks) == 1 |
| 703 | + assert thinking_blocks[0].content == "Let me think about this." |
| 704 | + assert len(text_blocks) == 1 |
| 705 | + assert text_blocks[0].text == "The answer is 42." |
| 706 | + |
| 707 | + # Verify thinking_delta on async path too |
| 708 | + reasoning_chunks = [ |
| 709 | + r for r in responses if r.additional_kwargs.get("thinking_delta") |
| 710 | + ] |
| 711 | + assert len(reasoning_chunks) == 2 |
| 712 | + |
| 713 | + |
| 714 | +@patch("llama_index.llms.openai.base.SyncOpenAI") |
| 715 | +def test_chat_reasoning_content_non_streaming(MockSyncOpenAI: MagicMock) -> None: |
| 716 | + """Test that reasoning_content in non-streaming responses is captured as ThinkingBlock.""" |
| 717 | + with CachedOpenAIApiKeys(set_fake_key=True): |
| 718 | + response = ChatCompletion( |
| 719 | + id="chatcmpl-reasoning", |
| 720 | + object="chat.completion", |
| 721 | + created=1700000000, |
| 722 | + model="qwen3-thinking", |
| 723 | + choices=[ |
| 724 | + Choice( |
| 725 | + message=ChatCompletionMessage( |
| 726 | + role="assistant", |
| 727 | + content="The answer is 42.", |
| 728 | + ), |
| 729 | + finish_reason="stop", |
| 730 | + index=0, |
| 731 | + ) |
| 732 | + ], |
| 733 | + ) |
| 734 | + response.choices[0].message.__pydantic_extra__ = { |
| 735 | + "reasoning_content": "Let me think step by step..." |
| 736 | + } |
| 737 | + |
| 738 | + mock_instance = MockSyncOpenAI.return_value |
| 739 | + mock_instance.chat.completions.create.return_value = response |
| 740 | + |
| 741 | + llm = OpenAI(model="gpt-4o", api_key="test-key") |
| 742 | + result = llm.chat([ChatMessage(role="user", content="test")]) |
| 743 | + |
| 744 | + thinking_blocks = [ |
| 745 | + b for b in result.message.blocks if isinstance(b, ThinkingBlock) |
| 746 | + ] |
| 747 | + text_blocks = [b for b in result.message.blocks if isinstance(b, TextBlock)] |
| 748 | + |
| 749 | + assert len(thinking_blocks) == 1 |
| 750 | + assert thinking_blocks[0].content == "Let me think step by step..." |
| 751 | + assert len(text_blocks) == 1 |
| 752 | + assert text_blocks[0].text == "The answer is 42." |
| 753 | + |
| 754 | + |
| 755 | +@patch("llama_index.llms.openai.base.SyncOpenAI") |
| 756 | +def test_stream_chat_no_reasoning_content(MockSyncOpenAI: MagicMock) -> None: |
| 757 | + """Test that streaming without reasoning_content produces no ThinkingBlock.""" |
| 758 | + with CachedOpenAIApiKeys(set_fake_key=True): |
| 759 | + mock_instance = MockSyncOpenAI.return_value |
| 760 | + mock_instance.chat.completions.create.return_value = ( |
| 761 | + mock_chat_completion_stream_v1() |
| 762 | + ) |
| 763 | + |
| 764 | + llm = OpenAI(model="gpt-4o", api_key="test-key") |
| 765 | + responses = list(llm.stream_chat([ChatMessage(role="user", content="test")])) |
| 766 | + |
| 767 | + final = responses[-1] |
| 768 | + thinking_blocks = [ |
| 769 | + b for b in final.message.blocks if isinstance(b, ThinkingBlock) |
| 770 | + ] |
| 771 | + assert len(thinking_blocks) == 0 |
| 772 | + assert final.message.content == "\n\n2" |
| 773 | + |
| 774 | + |
| 775 | +def test_to_openai_message_dict_skips_thinking_block() -> None: |
| 776 | + """Test that ThinkingBlock is skipped when converting messages to OpenAI format.""" |
| 777 | + from llama_index.llms.openai.utils import to_openai_message_dict |
| 778 | + |
| 779 | + message = ChatMessage( |
| 780 | + role="assistant", |
| 781 | + blocks=[ |
| 782 | + ThinkingBlock(content="internal reasoning"), |
| 783 | + TextBlock(text="The answer is 42."), |
| 784 | + ], |
| 785 | + ) |
| 786 | + |
| 787 | + result = to_openai_message_dict(message) |
| 788 | + assert result["role"] == "assistant" |
| 789 | + assert result["content"] == "The answer is 42." |
| 790 | + |
| 791 | + |
| 792 | +def test_from_openai_message_with_reasoning_content() -> None: |
| 793 | + """Test that from_openai_message extracts reasoning_content as ThinkingBlock.""" |
| 794 | + from llama_index.llms.openai.utils import from_openai_message |
| 795 | + |
| 796 | + openai_msg = ChatCompletionMessage( |
| 797 | + role="assistant", |
| 798 | + content="The answer is 42.", |
| 799 | + ) |
| 800 | + openai_msg.__pydantic_extra__ = {"reasoning_content": "Let me think..."} |
| 801 | + |
| 802 | + result = from_openai_message(openai_msg, modalities=["text"]) |
| 803 | + |
| 804 | + thinking_blocks = [b for b in result.blocks if isinstance(b, ThinkingBlock)] |
| 805 | + text_blocks = [b for b in result.blocks if isinstance(b, TextBlock)] |
| 806 | + |
| 807 | + assert len(thinking_blocks) == 1 |
| 808 | + assert thinking_blocks[0].content == "Let me think..." |
| 809 | + assert len(text_blocks) == 1 |
| 810 | + assert text_blocks[0].text == "The answer is 42." |
| 811 | + |
| 812 | + |
| 813 | +def test_from_openai_message_without_reasoning_content() -> None: |
| 814 | + """Test that from_openai_message works normally without reasoning_content.""" |
| 815 | + from llama_index.llms.openai.utils import from_openai_message |
| 816 | + |
| 817 | + openai_msg = ChatCompletionMessage( |
| 818 | + role="assistant", |
| 819 | + content="Hello!", |
| 820 | + ) |
| 821 | + |
| 822 | + result = from_openai_message(openai_msg, modalities=["text"]) |
| 823 | + |
| 824 | + thinking_blocks = [b for b in result.blocks if isinstance(b, ThinkingBlock)] |
| 825 | + assert len(thinking_blocks) == 0 |
| 826 | + assert len(result.blocks) == 1 |
| 827 | + assert result.blocks[0].text == "Hello!" |
0 commit comments