File tree Expand file tree Collapse file tree 13 files changed +427
-211
lines changed
Expand file tree Collapse file tree 13 files changed +427
-211
lines changed Original file line number Diff line number Diff line change @@ -120,6 +120,16 @@ def _load_full_config(
120120 "metadata" : {"config" : "unknown" }
121121 } # python task without YAML
122122
123+ # Handle task_list configs - merge base config with per-task overrides
124+ if "task_list" in cfg :
125+ task_list = cfg .pop ("task_list" )
126+ # Find the entry for this task in task_list
127+ for item in task_list :
128+ if isinstance (item , dict ) and item .get ("task" ) == entry .name :
129+ # Merge per-task overrides
130+ cfg = {** cfg , ** item }
131+ break
132+
123133 if overrides :
124134 cfg = {** cfg , ** overrides }
125135 cfg ["metadata" ] = (
Original file line number Diff line number Diff line change @@ -33,8 +33,7 @@ def __init__(
3333 self ._factory = TaskFactory (meta = metadata )
3434
3535 all_paths : list [Path ] = []
36- if include_defaults :
37- all_paths .append (Path (__file__ ).parent )
36+ # Process include_path FIRST so user tasks take precedence over defaults
3837 if include_path :
3938 all_paths += [
4039 Path (p )
@@ -44,6 +43,8 @@ def __init__(
4443 else [include_path ]
4544 )
4645 ]
46+ if include_defaults :
47+ all_paths .append (Path (__file__ ).parent )
4748
4849 self ._index = index .build (all_paths )
4950
Original file line number Diff line number Diff line change 1+ # Group configuration demonstrating task collections
2+
3+ group : test_group
4+ task :
5+ - task : group_task_fs0
6+ dataset_path : json
7+ dataset_kwargs :
8+ data_files :
9+ test : tests/test_configs/test_data.json
10+ output_type : multiple_choice
11+ doc_to_text : " {{question}}"
12+ doc_to_target : " {{choices[answer]}}"
13+ test_split : test
14+ num_fewshot : 0
15+ metric_list :
16+ - metric : acc
17+ aggregation : mean
18+ higher_is_better : true
19+ - task : group_task_fs2
20+ dataset_path : json
21+ dataset_kwargs :
22+ data_files :
23+ test : tests/test_configs/test_data.json
24+ output_type : multiple_choice
25+ doc_to_text : " {{question}}"
26+ doc_to_target : " {{choices[answer]}}"
27+ test_split : test
28+ num_fewshot : 2
29+ metric_list :
30+ - metric : acc
31+ aggregation : mean
32+ higher_is_better : true
Original file line number Diff line number Diff line change 1+ # Base configuration for include walkthrough tests
2+ # This will be included by other configs to demonstrate inheritance
3+
4+ task : base_task # This should be overridden by including configs
5+ dataset_path : json
6+ dataset_kwargs :
7+ data_files :
8+ test : tests/test_configs/test_data.json
9+ output_type : multiple_choice
10+ doc_to_text : " {{question}}"
11+ doc_to_target : " {{choices[answer]}}"
12+ test_split : test
13+ num_fewshot : 0 # Default, can be overridden
14+ metric_list :
15+ - metric : acc
16+ aggregation : mean
17+ higher_is_better : true
18+ metadata :
19+ version : 1.0
20+ description : " Base config for include demonstration"
Original file line number Diff line number Diff line change 1+ # Group with multiple tasks using include inheritance
2+ # Demonstrates tasks sharing the same base config
3+
4+ group : include_group
5+ task :
6+ - include_task_fs0
7+ - include_task_fs1
8+ - include_task_fs5
Original file line number Diff line number Diff line change 1+ # Task demonstrating include inheritance
2+
3+ task : include_task_fs0
4+ include : include_base.yaml
5+ num_fewshot : 0
6+ description : " Zero-shot with inheritance"
Original file line number Diff line number Diff line change 1+ # Task demonstrating include inheritance
2+
3+ task : include_task_fs1
4+ include : include_base.yaml
5+ num_fewshot : 1
6+ description : " One-shot with inheritance"
Original file line number Diff line number Diff line change 1+ # Task demonstrating include inheritance with custom metrics
2+
3+ task : include_task_fs5
4+ include : include_base.yaml
5+ num_fewshot : 5
6+ description : " Five-shot with custom metrics"
7+ metric_list :
8+ - metric : acc
9+ aggregation : mean
10+ higher_is_better : true
11+ - metric : acc_norm
12+ aggregation : mean
13+ higher_is_better : true
Original file line number Diff line number Diff line change 1+ # Simple task configuration for walkthrough tests
2+ # Demonstrates basic task loading without any special features
3+
4+ task : simple_task
5+ dataset_path : json
6+ dataset_kwargs :
7+ data_files :
8+ test : tests/test_configs/test_data.json
9+ output_type : multiple_choice
10+ doc_to_text : " {{question}}"
11+ doc_to_target : " {{choices[answer]}}"
12+ test_split : test
13+ num_fewshot : 1
14+ metric_list :
15+ - metric : acc
16+ aggregation : mean
17+ higher_is_better : true
18+ metadata :
19+ version : 1.0
20+ description : " Simple task for basic walkthrough"
Original file line number Diff line number Diff line change 1+ # Task list configuration for code walkthrough tests
2+ # This demonstrates the task_list feature with shared config and task-specific overrides
3+
4+ dataset_path : json
5+ dataset_kwargs :
6+ data_files :
7+ test : tests/test_configs/test_data.json
8+ output_type : multiple_choice
9+ doc_to_text : " {{question}}"
10+ doc_to_target : " {{choices[answer]}}"
11+ test_split : test
12+ metric_list :
13+ - metric : acc
14+ aggregation : mean
15+ higher_is_better : true
16+ metadata :
17+ version : 1.0
18+ description : " Task list walkthrough example"
19+
20+ task_list :
21+ - task : task_list_fs0
22+ num_fewshot : 0
23+ description : " Zero-shot variant"
24+ - task : task_list_fs1
25+ num_fewshot : 1
26+ description : " One-shot variant"
27+ - task : task_list_fs3
28+ num_fewshot : 3
29+ description : " Three-shot variant"
30+ metric_list :
31+ - metric : acc
32+ aggregation : mean
33+ higher_is_better : true
34+ - metric : acc_norm
35+ aggregation : mean
36+ higher_is_better : true
You can’t perform that action at this time.
0 commit comments