Skip to content

Commit 70a76c2

Browse files
committed
feat(models): add gemini 3 and add forced params to models
1 parent 2c78dcb commit 70a76c2

File tree

11 files changed

+136
-21
lines changed

11 files changed

+136
-21
lines changed

AgentCrew/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.8.2"
1+
__version__ = "0.8.3"

AgentCrew/modules/custom_llm/service.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,16 +121,38 @@ async def stream_assistant_response(self, messages):
121121
# "max_tokens": 16000,
122122
}
123123
stream_params["temperature"] = self.temperature
124-
stream_params["extra_body"] = {"min_p": 0.1}
124+
stream_params["extra_body"] = {"min_p": 0.02}
125125

126+
full_model_id = f"{self._provider_name}/{self.model}"
127+
128+
forced_sample_params = ModelRegistry.get_model_sample_params(full_model_id)
129+
if forced_sample_params:
130+
if forced_sample_params.temperature is not None:
131+
stream_params["temperature"] = forced_sample_params.temperature
132+
if forced_sample_params.top_p is not None:
133+
stream_params["top_p"] = forced_sample_params.top_p
134+
if forced_sample_params.top_k is not None:
135+
stream_params["extra_body"]["top_k"] = forced_sample_params.top_k
136+
if forced_sample_params.frequency_penalty is not None:
137+
stream_params["frequency_penalty"] = (
138+
forced_sample_params.frequency_penalty
139+
)
140+
if forced_sample_params.presence_penalty is not None:
141+
stream_params["presence_penalty"] = (
142+
forced_sample_params.presence_penalty
143+
)
144+
if forced_sample_params.repetition_penalty is not None:
145+
stream_params["extra_body"]["repetition_penalty"] = (
146+
forced_sample_params.repetition_penalty
147+
)
148+
if forced_sample_params.min_p is not None:
149+
stream_params["extra_body"]["min_p"] = forced_sample_params.min_p
126150
# Add system message if provided
127151
if self.system_prompt:
128152
stream_params["messages"] = self._convert_internal_format(
129153
[{"role": "system", "content": self.system_prompt}] + messages
130154
)
131155

132-
full_model_id = f"{self._provider_name}/{self.model}"
133-
134156
# Add tools if available
135157
if self.tools and "tool_use" in ModelRegistry.get_model_capabilities(
136158
full_model_id

AgentCrew/modules/google/native_service.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,19 @@ async def stream_assistant_response(self, messages: List[Dict[str, Any]]) -> Any
393393
top_p=0.95,
394394
)
395395

396+
forced_sample_params = ModelRegistry.get_model_sample_params(full_model_id)
397+
if forced_sample_params:
398+
if forced_sample_params.temperature is not None:
399+
config.temperature = forced_sample_params.temperature
400+
if forced_sample_params.top_p is not None:
401+
config.top_p = forced_sample_params.top_p
402+
if forced_sample_params.top_k is not None:
403+
config.top_k = forced_sample_params.top_k
404+
if forced_sample_params.frequency_penalty is not None:
405+
config.frequency_penalty = forced_sample_params.frequency_penalty
406+
if forced_sample_params.presence_penalty is not None:
407+
config.presence_penalty = forced_sample_params.presence_penalty
408+
396409
# Add system instruction if available
397410
if self.system_prompt:
398411
config.system_instruction = self.system_prompt

AgentCrew/modules/llm/constants.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from .types import Model
1+
from .types import Model, SampleParam
22

33
_ANTHROPIC_MODELS = [
44
Model(
@@ -227,6 +227,17 @@
227227
input_token_price_1m=1.25,
228228
output_token_price_1m=10,
229229
),
230+
Model(
231+
id="gemini-3-pro-preview",
232+
provider="google",
233+
name="Gemini 3 Pro",
234+
max_context_token=1_000_000,
235+
description="Google's most intelligent model family to date, built on a foundation of state-of-the-art reasoning",
236+
capabilities=["tool_use", "thinking", "vision", "structured_output"],
237+
force_sample_params=SampleParam(temperature=1.0),
238+
input_token_price_1m=2,
239+
output_token_price_1m=12,
240+
),
230241
]
231242

232243
_DEEPINFRA_MODELS = [
@@ -254,6 +265,9 @@
254265
name="Qwen 3 Coder",
255266
description="Qwen3-Coder-480B-A35B-Instruct is the Qwen3's most agentic code model",
256267
capabilities=["tool_use", "stream", "structured_output"],
268+
force_sample_params=SampleParam(
269+
temperature=0.7, top_p=0.8, top_k=20, repetition_penalty=1.05
270+
),
257271
input_token_price_1m=0.4,
258272
output_token_price_1m=1.6,
259273
),
@@ -263,6 +277,9 @@
263277
name="Qwen 3 Coder",
264278
description="Qwen3-Coder-480B-A35B-Instruct is the Qwen3's most agentic code model",
265279
capabilities=["tool_use", "stream", "structured_output"],
280+
force_sample_params=SampleParam(
281+
temperature=0.7, top_p=0.8, top_k=20, min_p=0.0
282+
),
266283
input_token_price_1m=0.14,
267284
output_token_price_1m=1.1,
268285
),
@@ -272,6 +289,9 @@
272289
name="Qwen 3 MoE 235B-22B",
273290
description="Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models",
274291
capabilities=["tool_use", "thinking", "stream", "structured_output"],
292+
force_sample_params=SampleParam(
293+
temperature=0.6, top_p=0.95, top_k=20, min_p=0.0
294+
),
275295
input_token_price_1m=0.2,
276296
output_token_price_1m=0.6,
277297
),
@@ -280,6 +300,7 @@
280300
provider="deepinfra",
281301
name="Zai GLM-4.6",
282302
description="The GLM-4.6 series models are foundation models designed for intelligent agents",
303+
force_sample_params=SampleParam(temperature=1, top_p=0.95, top_k=40),
283304
capabilities=["tool_use", "stream", "structured_output"],
284305
input_token_price_1m=0.6,
285306
output_token_price_1m=2.0,
@@ -290,6 +311,9 @@
290311
name="Qwen 3 32B",
291312
description="Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models",
292313
capabilities=["tool_use", "stream", "structured_output"],
314+
force_sample_params=SampleParam(
315+
temperature=0.6, top_p=0.95, top_k=20, min_p=0.0
316+
),
293317
input_token_price_1m=0.1,
294318
output_token_price_1m=0.3,
295319
),
@@ -308,6 +332,7 @@
308332
name="DeepSeek R1 0528",
309333
description="The DeepSeek R1 model has undergone a minor version upgrade, with the current version being DeepSeek-R1-0528.",
310334
capabilities=["tool_use", "thinking", "stream", "structured_output"],
335+
force_sample_params=SampleParam(temperature=0.6),
311336
input_token_price_1m=0.5,
312337
output_token_price_1m=2.18,
313338
),
@@ -317,6 +342,7 @@
317342
name="Kimi K2 Instruct",
318343
description="Kimi K2 is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass",
319344
capabilities=["tool_use", "stream", "structured_output"],
345+
force_sample_params=SampleParam(temperature=0.6),
320346
input_token_price_1m=0.5,
321347
output_token_price_1m=2.0,
322348
),
@@ -342,6 +368,17 @@
342368
input_token_price_1m=0.0,
343369
output_token_price_1m=0.0,
344370
),
371+
Model(
372+
id="gemini-3-pro-preview",
373+
provider="github_copilot",
374+
name="Gemini 3 Pro",
375+
description="",
376+
capabilities=["tool_use", "vision", "stream"],
377+
default=False,
378+
input_token_price_1m=0.0,
379+
force_sample_params=SampleParam(temperature=1.0),
380+
output_token_price_1m=0.0,
381+
),
345382
Model(
346383
id="gpt-4.1",
347384
provider="github_copilot",

AgentCrew/modules/llm/model_registry.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,15 @@ def get_model_limit(cls, mode_id):
4646
return 128_000
4747
return model.max_context_token
4848

49+
@classmethod
50+
def get_model_sample_params(cls, mode_id):
51+
registry = ModelRegistry.get_instance()
52+
model = registry.get_model(mode_id)
53+
if not model or not model.force_sample_params:
54+
logger.warning(f"Model not found in registry: {mode_id}")
55+
return None
56+
return model.force_sample_params
57+
4958
def _load_custom_models_from_config(self):
5059
"""Loads models from custom LLM provider configurations and registers them."""
5160
try:

AgentCrew/modules/llm/types.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
from pydantic import BaseModel
2-
from typing import List, Literal
2+
from typing import List, Literal, Optional
3+
4+
5+
class SampleParam(BaseModel):
6+
temperature: Optional[float] = None
7+
top_p: Optional[float] = None
8+
min_p: Optional[float] = None
9+
top_k: Optional[int] = None
10+
frequency_penalty: Optional[float] = None
11+
presence_penalty: Optional[float] = None
12+
repetition_penalty: Optional[float] = None
313

414

515
class Model(BaseModel):
@@ -19,6 +29,7 @@ class Model(BaseModel):
1929
]
2030
]
2131
default: bool = False
32+
force_sample_params: Optional[SampleParam] = None
2233
max_context_token: int = 128_000
2334
input_token_price_1m: float = 0.0
2435
output_token_price_1m: float = 0.0

AgentCrew/modules/openai/response_service.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,16 @@ async def stream_assistant_response(self, messages) -> Any:
225225
"input": input_data,
226226
"stream": True,
227227
"instructions": self.system_prompt or None,
228+
"temperature": self.temperature,
228229
}
229230

231+
forced_sample_params = ModelRegistry.get_model_sample_params(full_model_id)
232+
if forced_sample_params:
233+
if forced_sample_params.temperature is not None:
234+
stream_params["temperature"] = forced_sample_params.temperature
235+
if forced_sample_params.top_p is not None:
236+
stream_params["top_p"] = forced_sample_params.top_p
237+
230238
# Add reasoning configuration for thinking models
231239
if "thinking" in ModelRegistry.get_model_capabilities(full_model_id):
232240
if self.reasoning_effort:
@@ -251,17 +259,17 @@ async def stream_assistant_response(self, messages) -> Any:
251259

252260
stream_params["tools"] = all_tools
253261

254-
# if (
255-
# "structured_output" in ModelRegistry.get_model_capabilities(full_model_id)
256-
# and self.structured_output
257-
# ):
258-
# from openai.types import ResponseFormatJSONSchema
259-
#
260-
# stream_params["text"] = {
261-
# "format": ResponseFormatJSONSchema.model_validate(
262-
# {"type": "json_schema", "json_schema": self.structured_output}
263-
# )
264-
# }
262+
if (
263+
"structured_output" in ModelRegistry.get_model_capabilities(full_model_id)
264+
and self.structured_output
265+
):
266+
stream_params["text"] = {
267+
"format": {
268+
"name": "default",
269+
"type": "json_schema",
270+
"json_schema": self.structured_output,
271+
}
272+
}
265273

266274
return await self.client.responses.create(**stream_params)
267275

AgentCrew/modules/openai/service.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,13 +196,28 @@ async def stream_assistant_response(self, messages) -> Any:
196196
"stream_options": {"include_usage": True},
197197
"max_tokens": 20000,
198198
}
199+
199200
if "thinking" in ModelRegistry.get_model_capabilities(full_model_id):
200201
stream_params.pop("max_tokens", None)
201202
if self.reasoning_effort:
202203
stream_params["reasoning_effort"] = self.reasoning_effort
203204
else:
204205
stream_params["temperature"] = self.temperature
205206
stream_params["top_p"] = 0.95
207+
forced_sample_params = ModelRegistry.get_model_sample_params(full_model_id)
208+
if forced_sample_params:
209+
if forced_sample_params.temperature is not None:
210+
stream_params["temperature"] = forced_sample_params.temperature
211+
if forced_sample_params.top_p is not None:
212+
stream_params["top_p"] = forced_sample_params.top_p
213+
if forced_sample_params.frequency_penalty is not None:
214+
stream_params["frequency_penalty"] = (
215+
forced_sample_params.frequency_penalty
216+
)
217+
if forced_sample_params.presence_penalty is not None:
218+
stream_params["presence_penalty"] = (
219+
forced_sample_params.presence_penalty
220+
)
206221

207222
# Add system message if provided
208223
if self.system_prompt:

docker/pyproject.docker.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "agentcrew-ai"
3-
version = "0.8.2"
3+
version = "0.8.3"
44
requires-python = ">=3.12"
55
classifiers = [
66
"Programming Language :: Python :: 3",
@@ -46,7 +46,7 @@ dependencies = [
4646
"xmltodict>=0.14.2",
4747
"jsonref>=1.1.0",
4848
"pychromedevtools>=0.3.3",
49-
"html-to-markdown>=1.14.0",
49+
"html-to-markdown>=1.14.0,<2",
5050
"pip-system-certs>=5.2",
5151
"loguru>=0.7.3",
5252
"jsonschema>=4.25.1",

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "agentcrew-ai"
3-
version = "0.8.2"
3+
version = "0.8.3"
44
requires-python = ">=3.12"
55
classifiers = [
66
"Programming Language :: Python :: 3",

0 commit comments

Comments
 (0)