Skip to content

Commit e61d9c0

Browse files
authored
Merge branch 'main' into api-compliance-monitor
2 parents 770723a + caf1b0b commit e61d9c0

File tree

36 files changed

+1112
-332
lines changed

36 files changed

+1112
-332
lines changed

.agents/skills/custom-codereview-guide.md

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,27 @@ You are an expert code reviewer for the **OpenHands/software-agent-sdk** reposit
1313

1414
You have permission to **APPROVE** or **COMMENT** on PRs. Do not use REQUEST_CHANGES.
1515

16-
**Default to APPROVE**: If your review finds no issues at "important" level or higher, approve the PR. Minor suggestions or nitpicks alone are not sufficient reason to withhold approval.
16+
### Review decision policy (eval / benchmark risk)
1717

18-
**IMPORTANT: If you determine a PR is worth merging, you should approve it.** Don’t just say a PR is "worth merging" or "ready to merge" without actually submitting an approval. Your words and actions should be consistent.
18+
Do **NOT** submit an **APPROVE** review when the PR changes agent behavior or anything
19+
that could plausibly affect benchmark/evaluation performance.
20+
21+
Examples include: prompt templates, tool calling/execution, planning/loop logic,
22+
memory/condenser behavior, terminal/stdin/stdout handling, or evaluation harness code.
23+
24+
If a PR is in this category (or you are uncertain), leave a **COMMENT** review and
25+
explicitly flag it for a human maintainer to decide after running lightweight evals.
26+
27+
### Default approval policy
28+
29+
**Default to APPROVE**: If your review finds no issues at "important" level or higher,
30+
approve the PR. Minor suggestions or nitpicks alone are not sufficient reason to
31+
withhold approval.
32+
33+
**IMPORTANT:** If you determine a PR is worth merging **and it is not in the eval-risk
34+
category above**, you should approve it. Don’t just say a PR is "worth merging" or
35+
"ready to merge" without actually submitting an approval. Your words and actions should
36+
be consistent.
1937

2038
### When to APPROVE
2139

.github/run-eval/resolve_model_config.py

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,10 @@
3737
"kimi-k2-thinking": {
3838
"id": "kimi-k2-thinking",
3939
"display_name": "Kimi K2 Thinking",
40-
"llm_config": {"model": "litellm_proxy/moonshot/kimi-k2-thinking"},
40+
"llm_config": {
41+
"model": "litellm_proxy/moonshot/kimi-k2-thinking",
42+
"temperature": 1.0,
43+
},
4144
},
4245
# https://www.kimi.com/blog/kimi-k2-5.html
4346
"kimi-k2.5": {
@@ -93,17 +96,26 @@
9396
"gemini-3-pro": {
9497
"id": "gemini-3-pro",
9598
"display_name": "Gemini 3 Pro",
96-
"llm_config": {"model": "litellm_proxy/gemini-3-pro-preview"},
99+
"llm_config": {
100+
"model": "litellm_proxy/gemini-3-pro-preview",
101+
"temperature": 0.0,
102+
},
97103
},
98104
"gemini-3-flash": {
99105
"id": "gemini-3-flash",
100106
"display_name": "Gemini 3 Flash",
101-
"llm_config": {"model": "litellm_proxy/gemini-3-flash-preview"},
107+
"llm_config": {
108+
"model": "litellm_proxy/gemini-3-flash-preview",
109+
"temperature": 0.0,
110+
},
102111
},
103112
"gemini-3.1-pro": {
104113
"id": "gemini-3.1-pro",
105114
"display_name": "Gemini 3.1 Pro",
106-
"llm_config": {"model": "litellm_proxy/gemini-3.1-pro-preview"},
115+
"llm_config": {
116+
"model": "litellm_proxy/gemini-3.1-pro-preview",
117+
"temperature": 0.0,
118+
},
107119
},
108120
"gpt-5.2": {
109121
"id": "gpt-5.2",
@@ -126,7 +138,10 @@
126138
"minimax-m2": {
127139
"id": "minimax-m2",
128140
"display_name": "MiniMax M2",
129-
"llm_config": {"model": "litellm_proxy/minimax/minimax-m2"},
141+
"llm_config": {
142+
"model": "litellm_proxy/minimax/minimax-m2",
143+
"temperature": 0.0,
144+
},
130145
},
131146
"minimax-m2.5": {
132147
"id": "minimax-m2.5",
@@ -140,7 +155,10 @@
140155
"minimax-m2.1": {
141156
"id": "minimax-m2.1",
142157
"display_name": "MiniMax M2.1",
143-
"llm_config": {"model": "litellm_proxy/minimax/MiniMax-M2.1"},
158+
"llm_config": {
159+
"model": "litellm_proxy/minimax/MiniMax-M2.1",
160+
"temperature": 0.0,
161+
},
144162
},
145163
"deepseek-v3.2-reasoner": {
146164
"id": "deepseek-v3.2-reasoner",
@@ -151,7 +169,8 @@
151169
"id": "qwen-3-coder",
152170
"display_name": "Qwen 3 Coder",
153171
"llm_config": {
154-
"model": "litellm_proxy/fireworks_ai/qwen3-coder-480b-a35b-instruct"
172+
"model": "litellm_proxy/fireworks_ai/qwen3-coder-480b-a35b-instruct",
173+
"temperature": 0.0,
155174
},
156175
},
157176
"nemotron-3-nano-30b": {
@@ -167,6 +186,7 @@
167186
"display_name": "GLM-4.7",
168187
"llm_config": {
169188
"model": "litellm_proxy/openrouter/z-ai/glm-4.7",
189+
"temperature": 0.0,
170190
# OpenRouter glm-4.7 is text-only despite LiteLLM reporting vision support
171191
"disable_vision": True,
172192
},
@@ -176,24 +196,34 @@
176196
"display_name": "GLM-5",
177197
"llm_config": {
178198
"model": "litellm_proxy/openrouter/z-ai/glm-5",
199+
"temperature": 0.0,
179200
# OpenRouter glm-5 is text-only despite LiteLLM reporting vision support
180201
"disable_vision": True,
181202
},
182203
},
183204
"qwen3-coder-next": {
184205
"id": "qwen3-coder-next",
185206
"display_name": "Qwen3 Coder Next",
186-
"llm_config": {"model": "litellm_proxy/openrouter/qwen/qwen3-coder-next"},
207+
"llm_config": {
208+
"model": "litellm_proxy/openrouter/qwen/qwen3-coder-next",
209+
"temperature": 0.0,
210+
},
187211
},
188212
"qwen3-coder-30b-a3b-instruct": {
189213
"id": "qwen3-coder-30b-a3b-instruct",
190214
"display_name": "Qwen3 Coder 30B A3B Instruct",
191-
"llm_config": {"model": "litellm_proxy/Qwen3-Coder-30B-A3B-Instruct"},
215+
"llm_config": {
216+
"model": "litellm_proxy/Qwen3-Coder-30B-A3B-Instruct",
217+
"temperature": 0.0,
218+
},
192219
},
193220
"gpt-oss-20b": {
194221
"id": "gpt-oss-20b",
195222
"display_name": "GPT OSS 20B",
196-
"llm_config": {"model": "litellm_proxy/gpt-oss-20b"},
223+
"llm_config": {
224+
"model": "litellm_proxy/gpt-oss-20b",
225+
"temperature": 0.0,
226+
},
197227
},
198228
}
199229

0 commit comments

Comments
 (0)