Skip to content

Conversation

codeflash-ai[bot]
Copy link

@codeflash-ai codeflash-ai bot commented Jul 22, 2025

📄 44% (0.44x) speedup for GroqModel._process_response in pydantic_ai_slim/pydantic_ai/models/groq.py

⏱️ Runtime : 365 microseconds 254 microseconds (best of 68 runs)

📝 Explanation and details

REFINEMENT
Key optimizations:

  • split_content_into_text_and_thinking now performs linear index scanning instead of repeatedly slicing and assigning new content strings (removes quadratic-like behavior on highly segmented text). No unnecessary reallocation or chained slicing.
  • In _process_response, changed the loop for tool calls to a generator inside .extend() to reduce method-call overhead.
  • Removed the local, redundant definition of number_to_datetime, using import instead.
  • Kept all function signatures and logic the same, but reduced unnecessary operations in parsing functions.
  • Maintained all necessary comments and docstrings.

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 30 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
📊 Tests Coverage 100.0%
🌀 Generated Regression Tests and Runtime
from datetime import datetime
from typing import Any, Optional

# imports
import pytest
from pydantic_ai.models.groq import GroqModel

# function to test and dependencies (from user prompt, assumed already imported above)

# --- Mocks for groq.types.chat.ChatCompletion and related types ---

class MockUsage:
    def __init__(self, prompt_tokens=None, completion_tokens=None, total_tokens=None):
        self.prompt_tokens = prompt_tokens
        self.completion_tokens = completion_tokens
        self.total_tokens = total_tokens

class MockFunction:
    def __init__(self, name, arguments):
        self.name = name
        self.arguments = arguments

class MockToolCall:
    def __init__(self, id, function):
        self.id = id
        self.function = function

class MockMessage:
    def __init__(self, reasoning=None, content=None, tool_calls=None):
        self.reasoning = reasoning
        self.content = content
        self.tool_calls = tool_calls

class MockChoice:
    def __init__(self, message):
        self.message = message

class MockChatCompletion:
    def __init__(self, created, choices, model, id, usage=None):
        self.created = created
        self.choices = choices
        self.model = model
        self.id = id
        self.usage = usage
        self.x_groq = None  # for compatibility

# --- Fixtures and helpers ---

@pytest.fixture
def groq_model():
    # Provide a GroqModel instance with dummy fields for testing _process_response
    class DummyProvider:
        client = None
        model_profile = None
    return GroqModel(model_name="llama-3.3-70b-versatile", provider=DummyProvider())

# --- BASIC TEST CASES ---



def test_basic_reasoning_content_toolcall(groq_model):
    """Test with reasoning, content, and tool call all present."""
    reasoning = "Calculating sum."
    content = "The answer is 42."
    tool_call = MockToolCall(
        id="tool2",
        function=MockFunction(name="sum", arguments='{"a":40,"b":2}')
    )
    created = 1710003333
    model = "llama-3.3-70b-versatile"
    id = "abc101"
    usage_obj = MockUsage(prompt_tokens=4, completion_tokens=5, total_tokens=9)
    message = MockMessage(reasoning=reasoning, content=content, tool_calls=[tool_call])
    choice = MockChoice(message)
    response = MockChatCompletion(created, [choice], model, id, usage=usage_obj)

    codeflash_output = groq_model._process_response(response); result = codeflash_output # 3.25μs -> 3.29μs (1.25% slower)

# --- EDGE TEST CASES ---

def test_empty_content_and_reasoning(groq_model):
    """Test with both content and reasoning as None."""
    created = 1710004444
    model = "llama-3.3-70b-versatile"
    id = "abc202"
    usage_obj = MockUsage(prompt_tokens=0, completion_tokens=0, total_tokens=0)
    message = MockMessage(reasoning=None, content=None, tool_calls=None)
    choice = MockChoice(message)
    response = MockChatCompletion(created, [choice], model, id, usage=usage_obj)

    codeflash_output = groq_model._process_response(response); result = codeflash_output # 1.79μs -> 1.83μs (2.24% slower)

def test_content_with_think_tags(groq_model):
    """Test with <think> tags in the content, should split into TextPart and ThinkingPart."""
    content = "This is a test. <think>I'm thinking</think> Now I'm done."
    created = 1710005555
    model = "llama-3.3-70b-versatile"
    id = "abc303"
    usage_obj = MockUsage(prompt_tokens=1, completion_tokens=2, total_tokens=3)
    message = MockMessage(reasoning=None, content=content, tool_calls=None)
    choice = MockChoice(message)
    response = MockChatCompletion(created, [choice], model, id, usage=usage_obj)

    codeflash_output = groq_model._process_response(response); result = codeflash_output # 3.54μs -> 3.50μs (1.20% faster)

def test_content_with_multiple_think_tags(groq_model):
    """Test with multiple <think>...</think> tags in the content."""
    content = "A<think>1</think>B<think>2</think>C"
    created = 1710006666
    model = "llama-3.3-70b-versatile"
    id = "abc404"
    usage_obj = MockUsage(prompt_tokens=2, completion_tokens=4, total_tokens=6)
    message = MockMessage(reasoning=None, content=content, tool_calls=None)
    choice = MockChoice(message)
    response = MockChatCompletion(created, [choice], model, id, usage=usage_obj)

    codeflash_output = groq_model._process_response(response); result = codeflash_output # 4.12μs -> 4.04μs (2.05% faster)

def test_content_with_unclosed_think_tag(groq_model):
    """Test with an unclosed <think> tag in the content."""
    content = "Start<think>unfinished"
    created = 1710007777
    model = "llama-3.3-70b-versatile"
    id = "abc505"
    usage_obj = MockUsage(prompt_tokens=1, completion_tokens=1, total_tokens=2)
    message = MockMessage(reasoning=None, content=content, tool_calls=None)
    choice = MockChoice(message)
    response = MockChatCompletion(created, [choice], model, id, usage=usage_obj)

    codeflash_output = groq_model._process_response(response); result = codeflash_output # 2.92μs -> 2.92μs (0.034% slower)

def test_tool_calls_multiple(groq_model):
    """Test with multiple tool calls."""
    tool_call1 = MockToolCall(
        id="toolA",
        function=MockFunction(name="search", arguments='{"q":"foo"}')
    )
    tool_call2 = MockToolCall(
        id="toolB",
        function=MockFunction(name="math", arguments='{"x":1,"y":2}')
    )
    created = 1710008888
    model = "llama-3.3-70b-versatile"
    id = "abc606"
    usage_obj = MockUsage(prompt_tokens=5, completion_tokens=10, total_tokens=15)
    message = MockMessage(reasoning=None, content=None, tool_calls=[tool_call1, tool_call2])
    choice = MockChoice(message)
    response = MockChatCompletion(created, [choice], model, id, usage=usage_obj)

    codeflash_output = groq_model._process_response(response); result = codeflash_output # 2.54μs -> 2.62μs (3.20% slower)

def test_missing_usage(groq_model):
    """Test with no usage object present."""
    content = "No usage!"
    created = 1710009999
    model = "llama-3.3-70b-versatile"
    id = "abc707"
    # usage is None
    message = MockMessage(reasoning=None, content=content, tool_calls=None)
    choice = MockChoice(message)
    response = MockChatCompletion(created, [choice], model, id, usage=None)

    codeflash_output = groq_model._process_response(response); result = codeflash_output # 2.29μs -> 2.58μs (11.3% slower)

def test_content_empty_string(groq_model):
    """Test with content as an empty string."""
    content = ""
    created = 1710011111
    model = "llama-3.3-70b-versatile"
    id = "abc808"
    usage_obj = MockUsage(prompt_tokens=1, completion_tokens=1, total_tokens=2)
    message = MockMessage(reasoning=None, content=content, tool_calls=None)
    choice = MockChoice(message)
    response = MockChatCompletion(created, [choice], model, id, usage=usage_obj)

    codeflash_output = groq_model._process_response(response); result = codeflash_output # 2.12μs -> 2.33μs (8.92% slower)

def test_tool_call_with_empty_function_args(groq_model):
    """Test with a tool call that has empty function arguments."""
    tool_call = MockToolCall(
        id="toolEmpty",
        function=MockFunction(name="noop", arguments='')
    )
    created = 1710012222
    model = "llama-3.3-70b-versatile"
    id = "abc909"
    usage_obj = MockUsage(prompt_tokens=1, completion_tokens=2, total_tokens=3)
    message = MockMessage(reasoning=None, content=None, tool_calls=[tool_call])
    choice = MockChoice(message)
    response = MockChatCompletion(created, [choice], model, id, usage=usage_obj)

    codeflash_output = groq_model._process_response(response); result = codeflash_output # 2.29μs -> 2.33μs (1.84% slower)

def test_content_with_adjacent_think_tags(groq_model):
    """Test with adjacent <think>...</think><think>...</think> tags in content."""
    content = "<think>foo</think><think>bar</think>"
    created = 1710013333
    model = "llama-3.3-70b-versatile"
    id = "abc010"
    usage_obj = MockUsage(prompt_tokens=2, completion_tokens=2, total_tokens=4)
    message = MockMessage(reasoning=None, content=content, tool_calls=None)
    choice = MockChoice(message)
    response = MockChatCompletion(created, [choice], model, id, usage=usage_obj)

    codeflash_output = groq_model._process_response(response); result = codeflash_output # 3.67μs -> 3.42μs (7.32% faster)

# --- LARGE SCALE TEST CASES ---


def test_large_content_with_think_tags(groq_model):
    """Test with large content containing many <think> tags."""
    # Create a content string with 500 <think> tags
    parts = []
    for i in range(500):
        parts.append(f"text{i} <think>think{i}</think> ")
    content = "".join(parts)
    created = 1710021111
    model = "llama-3.3-70b-versatile"
    id = "abc_large2"
    usage_obj = MockUsage(prompt_tokens=250, completion_tokens=250, total_tokens=500)
    message = MockMessage(reasoning=None, content=content, tool_calls=None)
    choice = MockChoice(message)
    response = MockChatCompletion(created, [choice], model, id, usage=usage_obj)

    codeflash_output = groq_model._process_response(response); result = codeflash_output # 319μs -> 206μs (54.5% faster)
    for i in range(0, 1000, 2):
        pass

def test_large_reasoning_and_content(groq_model):
    """Test with a long reasoning and long content."""
    reasoning = "R" * 1000
    content = "C" * 1000
    created = 1710022222
    model = "llama-3.3-70b-versatile"
    id = "abc_large3"
    usage_obj = MockUsage(prompt_tokens=500, completion_tokens=500, total_tokens=1000)
    message = MockMessage(reasoning=reasoning, content=content, tool_calls=None)
    choice = MockChoice(message)
    response = MockChatCompletion(created, [choice], model, id, usage=usage_obj)

    codeflash_output = groq_model._process_response(response); result = codeflash_output # 2.96μs -> 3.75μs (21.1% slower)



from abc import ABC
from dataclasses import dataclass, field
# function to test and all dependencies (from prompt, unchanged)
from datetime import datetime
from typing import Any, Generic, Literal, NamedTuple, TypeVar, Union

# imports
import pytest
from pydantic import TypeAdapter
from pydantic_ai.models.groq import GroqModel


# Message part classes
class ModelResponsePart: pass
class TextPart(ModelResponsePart):
    def __init__(self, content: str): self.content = content
    def __eq__(self, other): return isinstance(other, TextPart) and self.content == other.content
    def __repr__(self): return f"TextPart({self.content!r})"
class ThinkingPart(ModelResponsePart):
    def __init__(self, content: str): self.content = content
    def __eq__(self, other): return isinstance(other, ThinkingPart) and self.content == other.content
    def __repr__(self): return f"ThinkingPart({self.content!r})"
class ToolCallPart(ModelResponsePart):
    def __init__(self, tool_name: str, args: Any, tool_call_id: str): 
        self.tool_name = tool_name
        self.args = args
        self.tool_call_id = tool_call_id
    def __eq__(self, other): 
        return (isinstance(other, ToolCallPart) and 
                self.tool_name == other.tool_name and 
                self.args == other.args and 
                self.tool_call_id == other.tool_call_id)
    def __repr__(self): return f"ToolCallPart({self.tool_name!r}, {self.args!r}, {self.tool_call_id!r})"

class ModelResponse:
    def __init__(self, items, usage, model_name, timestamp, vendor_id):
        self.items = items
        self.usage = usage
        self.model_name = model_name
        self.timestamp = timestamp
        self.vendor_id = vendor_id
    def __eq__(self, other):
        return (isinstance(other, ModelResponse) and
                self.items == other.items and
                self.usage == other.usage and
                self.model_name == other.model_name and
                self.timestamp == other.timestamp and
                self.vendor_id == other.vendor_id)
    def __repr__(self):
        return (f"ModelResponse(items={self.items!r}, usage={self.usage!r}, "
                f"model_name={self.model_name!r}, timestamp={self.timestamp!r}, vendor_id={self.vendor_id!r})")

# Usage class for tracking tokens
class Usage:
    def __init__(self, request_tokens=0, response_tokens=0, total_tokens=0):
        self.request_tokens = request_tokens
        self.response_tokens = response_tokens
        self.total_tokens = total_tokens
    def __eq__(self, other):
        return (isinstance(other, Usage) and
                self.request_tokens == other.request_tokens and
                self.response_tokens == other.response_tokens and
                self.total_tokens == other.total_tokens)
    def __repr__(self):
        return (f"Usage(request_tokens={self.request_tokens}, response_tokens={self.response_tokens}, "
                f"total_tokens={self.total_tokens})")

# Simulate groq.types.chat.ChatCompletion and related classes
class UsageObj:
    def __init__(self, prompt_tokens, completion_tokens, total_tokens):
        self.prompt_tokens = prompt_tokens
        self.completion_tokens = completion_tokens
        self.total_tokens = total_tokens

class ToolFunction:
    def __init__(self, name, arguments):
        self.name = name
        self.arguments = arguments

class ToolCall:
    def __init__(self, id, function):
        self.id = id
        self.function = function

class Message:
    def __init__(self, reasoning=None, content=None, tool_calls=None):
        self.reasoning = reasoning
        self.content = content
        self.tool_calls = tool_calls

class Choice:
    def __init__(self, message):
        self.message = message

class ChatCompletion:
    def __init__(self, created, choices, usage, model, id):
        self.created = created
        self.choices = choices
        self.usage = usage
        self.model = model
        self.id = id

# The function under test
def _process_response(response: ChatCompletion) -> ModelResponse:
    timestamp = number_to_datetime(response.created)
    choice = response.choices[0]
    items: list[ModelResponsePart] = []
    if getattr(choice.message, 'reasoning', None) is not None:
        items.append(ThinkingPart(content=choice.message.reasoning))
    if getattr(choice.message, 'content', None) is not None:
        items.extend(split_content_into_text_and_thinking(choice.message.content))
    if getattr(choice.message, 'tool_calls', None) is not None:
        for c in choice.message.tool_calls:
            items.append(ToolCallPart(tool_name=c.function.name, args=c.function.arguments, tool_call_id=c.id))
    return ModelResponse(
        items, usage=_map_usage(response), model_name=response.model, timestamp=timestamp, vendor_id=response.id
    )

# -----------------
# UNIT TESTS BELOW
# -----------------

# Helper for datetime comparison
def dt_eq(dt1, dt2):
    # Allow both int/float epoch and datetime
    if isinstance(dt1, datetime) and isinstance(dt2, datetime):
        return abs(dt1.timestamp() - dt2.timestamp()) < 1e-6
    return False

# 1. BASIC TEST CASES

To edit these changes git checkout codeflash/optimize-GroqModel._process_response-mddw2rmc and push.

Codeflash

REFINEMENT 
**Key optimizations:**
- `split_content_into_text_and_thinking` now performs linear index scanning instead of repeatedly slicing and assigning new content strings (removes quadratic-like behavior on highly segmented text). No unnecessary reallocation or chained slicing.
- In `_process_response`, changed the loop for tool calls to a generator inside `.extend()` to reduce method-call overhead.
- Removed the local, redundant definition of `number_to_datetime`, using import instead.
- Kept all function signatures and logic the same, but reduced unnecessary operations in parsing functions.  
- Maintained all necessary comments and docstrings.
@codeflash-ai codeflash-ai bot added the ⚡️ codeflash Optimization PR opened by Codeflash AI label Jul 22, 2025
@codeflash-ai codeflash-ai bot requested a review from aseembits93 July 22, 2025 02:02
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
⚡️ codeflash Optimization PR opened by Codeflash AI
Projects
None yet
Development

Successfully merging this pull request may close these issues.

0 participants