Skip to content

Commit ff47706

Browse files
committed
add openrouter reasoning
1 parent 9245e8a commit ff47706

File tree

9 files changed

+115
-17
lines changed

9 files changed

+115
-17
lines changed

metagpt/configs/llm_config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class LLMType(Enum):
3636
MISTRAL = "mistral"
3737
YI = "yi" # lingyiwanwu
3838
OPENROUTER = "openrouter"
39+
OPENROUTER_REASONING = "openrouter_reasoning"
3940
BEDROCK = "bedrock"
4041
ARK = "ark" # https://www.volcengine.com/docs/82379/1263482#python-sdk
4142

@@ -102,7 +103,7 @@ class LLMConfig(YamlModel):
102103

103104
# reasoning / thinking switch
104105
reasoning: bool = False
105-
reasoning_tokens: int = 4000 # reasoning budget tokens to generate, usually smaller than max_tokens
106+
reasoning_max_token: int = 1024 # reasoning budget tokens to generate, usually smaller than max_token
106107

107108
@field_validator("api_key")
108109
@classmethod

metagpt/provider/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from metagpt.provider.anthropic_api import AnthropicLLM
2020
from metagpt.provider.bedrock_api import BedrockLLM
2121
from metagpt.provider.ark_api import ArkLLM
22+
from metagpt.provider.openrouter_reasoning import OpenrouterReasoningLLM
2223

2324
__all__ = [
2425
"GeminiLLM",
@@ -34,4 +35,5 @@
3435
"AnthropicLLM",
3536
"BedrockLLM",
3637
"ArkLLM",
38+
"OpenrouterReasoningLLM",
3739
]

metagpt/provider/anthropic_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict:
3434
kwargs["messages"] = messages[1:]
3535
kwargs["system"] = messages[0]["content"] # set system prompt here
3636
if self.config.reasoning:
37-
kwargs["thinking"] = {"type": "enabled", "budget_tokens": self.config.reasoning_tokens}
37+
kwargs["thinking"] = {"type": "enabled", "budget_tokens": self.config.reasoning_max_token}
3838
return kwargs
3939

4040
def _update_costs(self, usage: Usage, model: str = None, local_calc_usage: bool = True):

metagpt/provider/bedrock/base_provider.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ class BaseBedrockProvider(ABC):
77
# to handle different generation kwargs
88
max_tokens_field_name = "max_tokens"
99

10-
def __init__(self, reasoning: bool = False, reasoning_tokens: int = 4000):
10+
def __init__(self, reasoning: bool = False, reasoning_max_token: int = 1024):
1111
self.reasoning = reasoning
12-
self.reasoning_tokens = reasoning_tokens
12+
self.reasoning_max_token = reasoning_max_token
1313

1414
@abstractmethod
1515
def _get_completion_from_dict(self, rsp_dict: dict) -> str:

metagpt/provider/bedrock/bedrock_provider.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ def _get_completion_from_dict(self, rsp_dict: dict) -> str:
2020

2121
class AnthropicProvider(BaseBedrockProvider):
2222
# See https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
23+
# https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-37.html
24+
# https://docs.aws.amazon.com/code-library/latest/ug/python_3_bedrock-runtime_code_examples.html#anthropic_claude
2325

2426
def _split_system_user_messages(self, messages: list[dict]) -> Tuple[str, list[dict]]:
2527
system_messages = []
@@ -34,7 +36,7 @@ def _split_system_user_messages(self, messages: list[dict]) -> Tuple[str, list[d
3436
def get_request_body(self, messages: list[dict], generate_kwargs, *args, **kwargs) -> str:
3537
if self.reasoning:
3638
generate_kwargs["temperature"] = 1 # should be 1
37-
generate_kwargs["thinking"] = {"type": "enabled", "budget_tokens": self.reasoning_tokens}
39+
generate_kwargs["thinking"] = {"type": "enabled", "budget_tokens": self.reasoning_max_token}
3840

3941
system_message, user_messages = self._split_system_user_messages(messages)
4042
body = json.dumps(
@@ -189,7 +191,7 @@ def get_choice_text_from_stream(self, event) -> Union[bool, str]:
189191
}
190192

191193

192-
def get_provider(model_id: str, reasoning: bool = False, reasoning_tokens: int = 4000):
194+
def get_provider(model_id: str, reasoning: bool = False, reasoning_max_token: int = 1024):
193195
arr = model_id.split(".")
194196
if len(arr) == 2:
195197
provider, model_name = arr # meta、mistral……
@@ -208,4 +210,4 @@ def get_provider(model_id: str, reasoning: bool = False, reasoning_tokens: int =
208210
elif provider == "cohere":
209211
# distinguish between R/R+ and older models
210212
return PROVIDERS[provider](model_name)
211-
return PROVIDERS[provider](reasoning=reasoning, reasoning_tokens=reasoning_tokens)
213+
return PROVIDERS[provider](reasoning=reasoning, reasoning_max_token=reasoning_max_token)

metagpt/provider/bedrock_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def __init__(self, config: LLMConfig):
2424
self.config = config
2525
self.__client = self.__init_client("bedrock-runtime")
2626
self.__provider = get_provider(
27-
self.config.model, reasoning=self.config.reasoning, reasoning_tokens=self.config.reasoning_tokens
27+
self.config.model, reasoning=self.config.reasoning, reasoning_max_token=self.config.reasoning_max_token
2828
)
2929
self.cost_manager = CostManager(token_costs=BEDROCK_TOKEN_COSTS)
3030
if self.config.model in NOT_SUPPORT_STREAM_MODELS:

metagpt/provider/general_api_base.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,14 @@ def response_ms(self) -> Optional[int]:
150150
h = self._headers.get("Openai-Processing-Ms")
151151
return None if h is None else round(float(h))
152152

153+
def decode_asjson(self) -> Optional[dict]:
154+
bstr = self.data.strip()
155+
if bstr.startswith(b"{") and bstr.endswith(b"}"):
156+
bstr = bstr.decode("utf-8")
157+
else:
158+
bstr = parse_stream_helper(bstr)
159+
return json.loads(bstr) if bstr else None
160+
153161

154162
def _build_api_url(url, query):
155163
scheme, netloc, path, base_query, fragment = urlsplit(url)
@@ -547,13 +555,6 @@ async def arequest_raw(
547555
}
548556
try:
549557
result = await session.request(**request_kwargs)
550-
# log_info(
551-
# "LLM API response",
552-
# path=abs_url,
553-
# response_code=result.status,
554-
# processing_ms=result.headers.get("LLM-Processing-Ms"),
555-
# request_id=result.headers.get("X-Request-Id"),
556-
# )
557558
return result
558559
except (aiohttp.ServerTimeoutError, asyncio.TimeoutError) as e:
559560
raise openai.APITimeoutError("Request timed out") from e
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
# @Desc :
4+
5+
import json
6+
7+
from metagpt.configs.llm_config import LLMConfig, LLMType
8+
from metagpt.const import USE_CONFIG_TIMEOUT
9+
from metagpt.logs import log_llm_stream
10+
from metagpt.provider.base_llm import BaseLLM
11+
from metagpt.provider.general_api_requestor import GeneralAPIRequestor, OpenAIResponse
12+
from metagpt.provider.llm_provider_registry import register_provider
13+
14+
15+
@register_provider([LLMType.OPENROUTER_REASONING])
16+
class OpenrouterReasoningLLM(BaseLLM):
17+
def __init__(self, config: LLMConfig):
18+
self.client = GeneralAPIRequestor(base_url=config.base_url)
19+
self.config = config
20+
self.model = self.config.model
21+
self.http_method = "post"
22+
self.base_url = "https://openrouter.ai/api/v1"
23+
self.url_suffix = "/chat/completions"
24+
self.headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.config.api_key}"}
25+
26+
def decode(self, response: OpenAIResponse) -> dict:
27+
return json.loads(response.data.decode("utf-8"))
28+
29+
def _const_kwargs(
30+
self, messages: list[dict], stream: bool = False, timeout=USE_CONFIG_TIMEOUT, **extra_kwargs
31+
) -> dict:
32+
kwargs = {
33+
"messages": messages,
34+
"include_reasoning": True,
35+
"max_tokens": self.config.max_token,
36+
"temperature": self.config.temperature,
37+
"model": self.model,
38+
"stream": stream,
39+
}
40+
return kwargs
41+
42+
def get_choice_text(self, rsp: dict) -> str:
43+
if "reasoning" in rsp["choices"][0]["message"]:
44+
self.reasoning_content = rsp["choices"][0]["message"]["reasoning"]
45+
return rsp["choices"][0]["message"]["content"]
46+
47+
async def _achat_completion(self, messages: list[dict], timeout: int = USE_CONFIG_TIMEOUT) -> dict:
48+
payload = self._const_kwargs(messages)
49+
resp, _, _ = await self.client.arequest(
50+
url=self.url_suffix, method=self.http_method, params=payload, headers=self.headers # empty
51+
)
52+
resp = resp.decode_asjson()
53+
self._update_costs(resp["usage"], model=self.model)
54+
return resp
55+
56+
async def acompletion(self, messages: list[dict], timeout=USE_CONFIG_TIMEOUT) -> dict:
57+
return await self._achat_completion(messages, timeout=self.get_timeout(timeout))
58+
59+
async def _achat_completion_stream(self, messages: list[dict], timeout: int = USE_CONFIG_TIMEOUT) -> str:
60+
self.headers["Content-Type"] = "text/event-stream" # update header to adapt the client
61+
payload = self._const_kwargs(messages, stream=True)
62+
resp, _, _ = await self.client.arequest(
63+
url=self.url_suffix, method=self.http_method, params=payload, headers=self.headers, stream=True # empty
64+
)
65+
collected_content = []
66+
collected_reasoning_content = []
67+
usage = {}
68+
async for chunk in resp:
69+
chunk = chunk.decode_asjson()
70+
if not chunk:
71+
continue
72+
delta = chunk["choices"][0]["delta"]
73+
if "reasoning" in delta and delta["reasoning"]:
74+
collected_reasoning_content.append(delta["reasoning"])
75+
elif delta["content"]:
76+
collected_content.append(delta["content"])
77+
log_llm_stream(delta["content"])
78+
79+
usage = chunk.get("usage")
80+
81+
log_llm_stream("\n")
82+
self._update_costs(usage, model=self.model)
83+
full_content = "".join(collected_content)
84+
if collected_reasoning_content:
85+
self.reasoning_content = "".join(collected_reasoning_content)
86+
return full_content

metagpt/utils/token_counter.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
"claude-3-5-sonnet-20240620": {"prompt": 0.003, "completion": 0.015},
7575
"claude-3-opus-20240229": {"prompt": 0.015, "completion": 0.075},
7676
"claude-3-haiku-20240307": {"prompt": 0.00025, "completion": 0.00125},
77+
"claude-3-7-sonnet-20250219": {"prompt": 0.003, "completion": 0.015},
7778
"yi-34b-chat-0205": {"prompt": 0.0003, "completion": 0.0003},
7879
"yi-34b-chat-200k": {"prompt": 0.0017, "completion": 0.0017},
7980
"yi-large": {"prompt": 0.0028, "completion": 0.0028},
@@ -86,9 +87,14 @@
8687
"openai/o1-mini": {"prompt": 0.003, "completion": 0.012},
8788
"anthropic/claude-3-opus": {"prompt": 0.015, "completion": 0.075},
8889
"anthropic/claude-3.5-sonnet": {"prompt": 0.003, "completion": 0.015},
90+
"anthropic/claude-3.7-sonnet": {"prompt": 0.003, "completion": 0.015},
91+
"anthropic/claude-3.7-sonnet:beta": {"prompt": 0.003, "completion": 0.015},
92+
"anthropic/claude-3.7-sonnet:thinking": {"prompt": 0.003, "completion": 0.015},
93+
"us.anthropic.claude-3-7-sonnet-20250219-v1:0": {"prompt": 0.003, "completion": 0.015},
8994
"google/gemini-pro-1.5": {"prompt": 0.0025, "completion": 0.0075}, # for openrouter, end
90-
"deepseek-chat": {"prompt": 0.00014, "completion": 0.00028},
91-
"deepseek-coder": {"prompt": 0.00014, "completion": 0.00028},
95+
"deepseek-chat": {"prompt": 0.00027, "completion": 0.0011},
96+
"deepseek-coder": {"prompt": 0.00027, "completion": 0.0011},
97+
"deepseek-reasoner": {"prompt": 0.00055, "completion": 0.0022},
9298
# For ark model https://www.volcengine.com/docs/82379/1099320
9399
"doubao-lite-4k-240515": {"prompt": 0.000043, "completion": 0.000086},
94100
"doubao-lite-32k-240515": {"prompt": 0.000043, "completion": 0.000086},

0 commit comments

Comments
 (0)