Skip to content

Commit b3a7b49

Browse files
authored
Adjust openness and tool usage values (#70)
1 parent f12a714 commit b3a7b49

File tree

3 files changed

+31
-9
lines changed

3 files changed

+31
-9
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "agent-eval"
7-
version = "0.1.42"
7+
version = "0.1.43"
88
description = "Agent evaluation toolkit"
99
readme = "README.md"
1010
requires-python = ">=3.10"

src/agenteval/cli.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,16 @@
2020
from agenteval.leaderboard.schema_generator import load_dataset_features
2121

2222
from .cli_utils import AliasedChoice, generate_choice_help
23-
from .config import load_suite_config
23+
from .config import (
24+
OPENNESS_CLOSED_API_AVAILABLE,
25+
OPENNESS_CLOSED_UI_ONLY,
26+
OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS,
27+
OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS,
28+
TOOL_USAGE_CUSTOM_INTERFACE,
29+
TOOL_USAGE_FULLY_CUSTOM,
30+
TOOL_USAGE_STANDARD,
31+
load_suite_config,
32+
)
2433
from .io import atomic_write_file
2534
from .leaderboard.models import LeaderboardSubmission, Readme
2635
from .leaderboard.upload import (
@@ -39,15 +48,15 @@
3948
SUBMISSION_METADATA_FILENAME = "submission.json"
4049
SUMMARIES_PREFIX = "summaries"
4150
OPENNESS_MAPPING = {
42-
"c": "Closed",
43-
"api": "API Available",
44-
"os": "Open Source",
45-
"ow": "Open Source + Open Weights",
51+
"c": OPENNESS_CLOSED_UI_ONLY,
52+
"api": OPENNESS_CLOSED_API_AVAILABLE,
53+
"os": OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS,
54+
"ow": OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS,
4655
}
4756
TOOL_MAPPING = {
48-
"s": "Standard",
49-
"css": "Custom with Standard Search",
50-
"c": "Fully Custom",
57+
"s": TOOL_USAGE_STANDARD,
58+
"ci": TOOL_USAGE_CUSTOM_INTERFACE,
59+
"c": TOOL_USAGE_FULLY_CUSTOM,
5160
}
5261

5362

src/agenteval/config.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,19 @@
55
import yaml
66
from pydantic import BaseModel, ValidationError
77

8+
# If you change these, be careful about any downstream code
9+
# that depends on the exact values (e.g. asta-bench-leaderboard
10+
# expects results to have either these values for openness
11+
# and tool usage, or values from a specific list of aliases).
12+
OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS = "Open source & open weights"
13+
OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS = "Open source & closed weights"
14+
OPENNESS_CLOSED_API_AVAILABLE = "Closed source & API available"
15+
OPENNESS_CLOSED_UI_ONLY = "Closed source & UI only"
16+
17+
TOOL_USAGE_STANDARD = "Standard"
18+
TOOL_USAGE_CUSTOM_INTERFACE = "Custom interface"
19+
TOOL_USAGE_FULLY_CUSTOM = "Fully custom"
20+
821

922
class WeightAdjustment(BaseModel):
1023
"""Weight adjustment for a specific tag-task combination."""

0 commit comments

Comments
 (0)