feat(groq): openai-oss (#32411)

mdrxy · web-flow · commit 419c17322547 · 2025-08-05T14:18:56.000-04:00
use new openai-oss for integration tests, set module-level testing model
names and improve robustness of tool tests
diff --git a/libs/partners/groq/tests/integration_tests/test_chat_models.py b/libs/partners/groq/tests/integration_tests/test_chat_models.py
@@ -24,7 +24,8 @@
     FakeCallbackHandlerWithChatStart,
 )
 
-MODEL_NAME = "llama-3.3-70b-versatile"
+DEFAULT_MODEL_NAME = "openai/gpt-oss-20b"
+REASONING_MODEL_NAME = "deepseek-r1-distill-llama-70b"
 
 
 #
@@ -34,7 +35,7 @@
 def test_invoke() -> None:
     """Test Chat wrapper."""
     chat = ChatGroq(
-        model=MODEL_NAME,
+        model=DEFAULT_MODEL_NAME,
         temperature=0.7,
         base_url=None,
         groq_proxy=None,
@@ -55,7 +56,7 @@ def test_invoke() -> None:
 @pytest.mark.scheduled
 async def test_ainvoke() -> None:
     """Test ainvoke tokens from ChatGroq."""
-    chat = ChatGroq(model=MODEL_NAME, max_tokens=10)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10)
 
     result = await chat.ainvoke("Welcome to the Groqetship!", config={"tags": ["foo"]})
     assert isinstance(result, BaseMessage)
@@ -65,7 +66,7 @@ async def test_ainvoke() -> None:
 @pytest.mark.scheduled
 def test_batch() -> None:
     """Test batch tokens from ChatGroq."""
-    chat = ChatGroq(model=MODEL_NAME, max_tokens=10)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10)
 
     result = chat.batch(["Hello!", "Welcome to the Groqetship!"])
     for token in result:
@@ -76,7 +77,7 @@ def test_batch() -> None:
 @pytest.mark.scheduled
 async def test_abatch() -> None:
     """Test abatch tokens from ChatGroq."""
-    chat = ChatGroq(model=MODEL_NAME, max_tokens=10)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10)
 
     result = await chat.abatch(["Hello!", "Welcome to the Groqetship!"])
     for token in result:
@@ -87,7 +88,7 @@ async def test_abatch() -> None:
 @pytest.mark.scheduled
 async def test_stream() -> None:
     """Test streaming tokens from Groq."""
-    chat = ChatGroq(model=MODEL_NAME, max_tokens=10)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10)
 
     for token in chat.stream("Welcome to the Groqetship!"):
         assert isinstance(token, BaseMessageChunk)
@@ -97,7 +98,7 @@ async def test_stream() -> None:
 @pytest.mark.scheduled
 async def test_astream() -> None:
     """Test streaming tokens from Groq."""
-    chat = ChatGroq(model=MODEL_NAME, max_tokens=10)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10)
 
     full: Optional[BaseMessageChunk] = None
     chunks_with_token_counts = 0
@@ -136,7 +137,7 @@ async def test_astream() -> None:
 def test_generate() -> None:
     """Test sync generate."""
     n = 1
-    chat = ChatGroq(model=MODEL_NAME, max_tokens=10)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10)
     message = HumanMessage(content="Hello", n=1)
     response = chat.generate([[message], [message]])
     assert isinstance(response, LLMResult)
@@ -155,7 +156,7 @@ def test_generate() -> None:
 async def test_agenerate() -> None:
     """Test async generation."""
     n = 1
-    chat = ChatGroq(model=MODEL_NAME, max_tokens=10, n=1)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10, n=1)
     message = HumanMessage(content="Hello")
     response = await chat.agenerate([[message], [message]])
     assert isinstance(response, LLMResult)
@@ -178,7 +179,7 @@ def test_invoke_streaming() -> None:
     """Test that streaming correctly invokes on_llm_new_token callback."""
     callback_handler = FakeCallbackHandler()
     chat = ChatGroq(
-        model=MODEL_NAME,
+        model=DEFAULT_MODEL_NAME,
         max_tokens=2,
         streaming=True,
         temperature=0,
@@ -195,7 +196,7 @@ async def test_agenerate_streaming() -> None:
     """Test that streaming correctly invokes on_llm_new_token callback."""
     callback_handler = FakeCallbackHandlerWithChatStart()
     chat = ChatGroq(
-        model=MODEL_NAME,
+        model=DEFAULT_MODEL_NAME,
         max_tokens=10,
         streaming=True,
         temperature=0,
@@ -222,7 +223,7 @@ async def test_agenerate_streaming() -> None:
 def test_reasoning_output_invoke() -> None:
     """Test reasoning output from ChatGroq with invoke."""
     chat = ChatGroq(
-        model="deepseek-r1-distill-llama-70b",
+        model=REASONING_MODEL_NAME,
         reasoning_format="parsed",
     )
     message = [
@@ -241,7 +242,7 @@ def test_reasoning_output_invoke() -> None:
 def test_reasoning_output_stream() -> None:
     """Test reasoning output from ChatGroq with stream."""
     chat = ChatGroq(
-        model="deepseek-r1-distill-llama-70b",
+        model=REASONING_MODEL_NAME,
         reasoning_format="parsed",
     )
     message = [
@@ -300,7 +301,7 @@ def on_llm_end(
 
     callback = _FakeCallback()
     chat = ChatGroq(
-        model=MODEL_NAME,
+        model="llama-3.1-8b-instant",  # Use a model that properly streams content
         max_tokens=2,
         temperature=0,
         callbacks=[callback],
@@ -314,7 +315,7 @@ def on_llm_end(
 
 def test_system_message() -> None:
     """Test ChatGroq wrapper with system message."""
-    chat = ChatGroq(model=MODEL_NAME, max_tokens=10)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10)
     system_message = SystemMessage(content="You are to chat with the user.")
     human_message = HumanMessage(content="Hello")
     response = chat.invoke([system_message, human_message])
@@ -324,15 +325,15 @@ def test_system_message() -> None:
 
 def test_tool_choice() -> None:
     """Test that tool choice is respected."""
-    llm = ChatGroq(model=MODEL_NAME)
+    llm = ChatGroq(model=DEFAULT_MODEL_NAME)
 
     class MyTool(BaseModel):
         name: str
         age: int
 
     with_tool = llm.bind_tools([MyTool], tool_choice="MyTool")
 
-    resp = with_tool.invoke("Who was the 27 year old named Erick?")
+    resp = with_tool.invoke("Who was the 27 year old named Erick? Use the tool.")
     assert isinstance(resp, AIMessage)
     assert resp.content == ""  # should just be tool call
     tool_calls = resp.additional_kwargs["tool_calls"]
@@ -354,15 +355,15 @@ class MyTool(BaseModel):
 
 def test_tool_choice_bool() -> None:
     """Test that tool choice is respected just passing in True."""
-    llm = ChatGroq(model=MODEL_NAME)
+    llm = ChatGroq(model=DEFAULT_MODEL_NAME)
 
     class MyTool(BaseModel):
         name: str
         age: int
 
     with_tool = llm.bind_tools([MyTool], tool_choice=True)
 
-    resp = with_tool.invoke("Who was the 27 year old named Erick?")
+    resp = with_tool.invoke("Who was the 27 year old named Erick? Use the tool.")
     assert isinstance(resp, AIMessage)
     assert resp.content == ""  # should just be tool call
     tool_calls = resp.additional_kwargs["tool_calls"]
@@ -379,7 +380,7 @@ class MyTool(BaseModel):
 @pytest.mark.xfail(reason="Groq tool_choice doesn't currently force a tool call")
 def test_streaming_tool_call() -> None:
     """Test that tool choice is respected."""
-    llm = ChatGroq(model=MODEL_NAME)
+    llm = ChatGroq(model=DEFAULT_MODEL_NAME)
 
     class MyTool(BaseModel):
         name: str
@@ -417,7 +418,7 @@ class MyTool(BaseModel):
 @pytest.mark.xfail(reason="Groq tool_choice doesn't currently force a tool call")
 async def test_astreaming_tool_call() -> None:
     """Test that tool choice is respected."""
-    llm = ChatGroq(model=MODEL_NAME)
+    llm = ChatGroq(model=DEFAULT_MODEL_NAME)
 
     class MyTool(BaseModel):
         name: str
@@ -462,7 +463,9 @@ class Joke(BaseModel):
         setup: str = Field(description="question to set up a joke")
         punchline: str = Field(description="answer to resolve the joke")
 
-    chat = ChatGroq(model=MODEL_NAME).with_structured_output(Joke, method="json_mode")
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME).with_structured_output(
+        Joke, method="json_mode"
+    )
     result = chat.invoke(
         "Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys"
     )
@@ -476,38 +479,38 @@ def test_setting_service_tier_class() -> None:
     message = HumanMessage(content="Welcome to the Groqetship")
 
     # Initialization
-    chat = ChatGroq(model=MODEL_NAME, service_tier="auto")
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="auto")
     assert chat.service_tier == "auto"
     response = chat.invoke([message])
     assert isinstance(response, BaseMessage)
     assert isinstance(response.content, str)
     assert response.response_metadata.get("service_tier") == "auto"
 
-    chat = ChatGroq(model=MODEL_NAME, service_tier="flex")
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="flex")
     assert chat.service_tier == "flex"
     response = chat.invoke([message])
     assert response.response_metadata.get("service_tier") == "flex"
 
-    chat = ChatGroq(model=MODEL_NAME, service_tier="on_demand")
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="on_demand")
     assert chat.service_tier == "on_demand"
     response = chat.invoke([message])
     assert response.response_metadata.get("service_tier") == "on_demand"
 
-    chat = ChatGroq(model=MODEL_NAME)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME)
     assert chat.service_tier == "on_demand"
     response = chat.invoke([message])
     assert response.response_metadata.get("service_tier") == "on_demand"
 
     with pytest.raises(ValueError):
-        ChatGroq(model=MODEL_NAME, service_tier=None)  # type: ignore[arg-type]
+        ChatGroq(model=DEFAULT_MODEL_NAME, service_tier=None)  # type: ignore[arg-type]
     with pytest.raises(ValueError):
-        ChatGroq(model=MODEL_NAME, service_tier="invalid")  # type: ignore[arg-type]
+        ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="invalid")  # type: ignore[arg-type]
 
 
 def test_setting_service_tier_request() -> None:
     """Test setting service tier defined at request level."""
     message = HumanMessage(content="Welcome to the Groqetship")
-    chat = ChatGroq(model=MODEL_NAME)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME)
 
     response = chat.invoke(
         [message],
@@ -537,7 +540,7 @@ def test_setting_service_tier_request() -> None:
 
     # If an `invoke` call is made with no service tier, we fall back to the class level
     # setting
-    chat = ChatGroq(model=MODEL_NAME, service_tier="auto")
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="auto")
     response = chat.invoke(
         [message],
     )
@@ -564,15 +567,15 @@ def test_setting_service_tier_request() -> None:
 
 def test_setting_service_tier_streaming() -> None:
     """Test service tier settings for streaming calls."""
-    chat = ChatGroq(model=MODEL_NAME, service_tier="flex")
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="flex")
     chunks = list(chat.stream("Why is the sky blue?", service_tier="auto"))
 
     assert chunks[-1].response_metadata.get("service_tier") == "auto"
 
 
 async def test_setting_service_tier_request_async() -> None:
     """Test async setting of service tier at the request level."""
-    chat = ChatGroq(model=MODEL_NAME, service_tier="flex")
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="flex")
     response = await chat.ainvoke("Hello!", service_tier="on_demand")
 
     assert response.response_metadata.get("service_tier") == "on_demand"