Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion results/glm-4.7/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"openness": "open_weights",
"country": "cn",
"tool_usage": "standard",
"submission_time": "2026-02-09T17:50:43.756380+00:00",
"submission_time": "2026-02-09T17:47:05.034520+00:00",
"directory_name": "glm-4.7",
"release_date": "2025-12-22",
"parameter_count_b": 355,
Expand Down
22 changes: 22 additions & 0 deletions results/glm-4.7/scores.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,28 @@
"agent_version": "v1.8.3",
"submission_time": "2026-01-30T12:51:32.483444+00:00"
},
{
"benchmark": "swe-bench-multimodal",
"score": 22.1,
"metric": "solveable_accuracy",
"cost_per_instance": 0.66,
"average_runtime": 1519.0,
"full_archive": "https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-openrouter-z-ai-glm-4-7/21674056150/results.tar.gz",
"tags": [
"swe-bench-multimodal"
],
"component_scores": {
"solveable_accuracy": 22.1,
"unsolveable_accuracy": 0.0,
"combined_accuracy": 14.7,
"solveable_resolved": 15,
"solveable_total": 68,
"unsolveable_resolved": 0,
"unsolveable_total": 34
},
"agent_version": "v1.10.0",
"submission_time": "2026-02-05T03:47:02+00:00"
},
{
"benchmark": "gaia",
"score": 53.9,
Expand Down