|
4 | 4 | workflow_call: |
5 | 5 | inputs: |
6 | 6 | model_name: |
7 | | - description: 'Model to use (e.g., anthropic:claude-sonnet-4-5)' |
| 7 | + description: "Model to use (e.g., anthropic:claude-sonnet-4-5)" |
8 | 8 | required: false |
9 | 9 | type: string |
10 | 10 | thinking_level: |
11 | | - description: 'Thinking level (off, low, medium, high)' |
| 11 | + description: "Thinking level (off, low, medium, high)" |
12 | 12 | required: false |
13 | 13 | type: string |
14 | 14 | dataset: |
15 | | - description: 'Terminal-Bench dataset to use' |
| 15 | + description: "Terminal-Bench dataset to use" |
16 | 16 | required: false |
17 | 17 | type: string |
18 | | - default: 'terminal-bench-core==0.1.1' |
| 18 | + default: "terminal-bench-core==0.1.1" |
19 | 19 | concurrency: |
20 | | - description: 'Number of concurrent tasks (--n-concurrent)' |
| 20 | + description: "Number of concurrent tasks (--n-concurrent)" |
21 | 21 | required: false |
22 | 22 | type: string |
23 | | - default: '4' |
| 23 | + default: "4" |
24 | 24 | livestream: |
25 | | - description: 'Enable livestream mode' |
| 25 | + description: "Enable livestream mode" |
26 | 26 | required: false |
27 | 27 | type: boolean |
28 | 28 | default: true |
29 | 29 | sample_size: |
30 | | - description: 'Number of random tasks to run (empty = all tasks)' |
| 30 | + description: "Number of random tasks to run (empty = all tasks)" |
31 | 31 | required: false |
32 | 32 | type: string |
33 | 33 | extra_args: |
34 | | - description: 'Additional arguments to pass to terminal-bench' |
| 34 | + description: "Additional arguments to pass to terminal-bench" |
35 | 35 | required: false |
36 | 36 | type: string |
37 | 37 | secrets: |
|
42 | 42 | workflow_dispatch: |
43 | 43 | inputs: |
44 | 44 | dataset: |
45 | | - description: 'Terminal-Bench dataset to use' |
| 45 | + description: "Terminal-Bench dataset to use" |
46 | 46 | required: false |
47 | | - default: 'terminal-bench-core==0.1.1' |
| 47 | + default: "terminal-bench-core==0.1.1" |
48 | 48 | type: string |
49 | 49 | concurrency: |
50 | | - description: 'Number of concurrent tasks (--n-concurrent)' |
| 50 | + description: "Number of concurrent tasks (--n-concurrent)" |
51 | 51 | required: false |
52 | | - default: '4' |
| 52 | + default: "4" |
53 | 53 | type: string |
54 | 54 | livestream: |
55 | | - description: 'Enable livestream mode' |
| 55 | + description: "Enable livestream mode" |
56 | 56 | required: false |
57 | 57 | default: true |
58 | 58 | type: boolean |
59 | 59 | sample_size: |
60 | | - description: 'Number of random tasks to run (empty = all tasks)' |
| 60 | + description: "Number of random tasks to run (empty = all tasks)" |
61 | 61 | required: false |
62 | 62 | type: string |
63 | 63 | model_name: |
64 | | - description: 'Model to use (e.g., anthropic:claude-sonnet-4-5, openai:gpt-5-codex)' |
| 64 | + description: "Model to use (e.g., anthropic:claude-sonnet-4-5, openai:gpt-5-codex)" |
65 | 65 | required: false |
66 | 66 | type: string |
67 | 67 | thinking_level: |
68 | | - description: 'Thinking level (off, low, medium, high)' |
| 68 | + description: "Thinking level (off, low, medium, high)" |
69 | 69 | required: false |
70 | 70 | type: string |
71 | 71 | extra_args: |
72 | | - description: 'Additional arguments to pass to terminal-bench' |
| 72 | + description: "Additional arguments to pass to terminal-bench" |
73 | 73 | required: false |
74 | 74 | type: string |
75 | 75 |
|
@@ -148,4 +148,3 @@ jobs: |
148 | 148 | runs/ |
149 | 149 | if-no-files-found: warn |
150 | 150 | retention-days: 30 |
151 | | - |
0 commit comments