1717import hashlib
1818import importlib .metadata
1919import itertools as it
20- import os
2120import subprocess
2221import tomllib
2322import warnings
2827import yaml
2928
3029from spras .config .container_schema import ProcessedContainerSettings
31- from spras .config .schema import RawConfig
32- from spras .util import NpHashEncoder , hash_params_sha1_base32
30+ from spras .config .schema import DatasetSchema , RawConfig
31+ from spras .util import LoosePathLike , NpHashEncoder , hash_params_sha1_base32
3332
3433config = None
3534
@@ -93,19 +92,7 @@ def init_global(config_dict):
9392
9493def init_from_file (filepath ):
9594 global config
96-
97- # Handle opening the file and parsing the yaml
98- filepath = os .path .abspath (filepath )
99- try :
100- with open (filepath , 'r' ) as yaml_file :
101- config_dict = yaml .safe_load (yaml_file )
102- except FileNotFoundError as e :
103- raise RuntimeError (f"Error: The specified config '{ filepath } ' could not be found." ) from e
104- except yaml .YAMLError as e :
105- raise RuntimeError (f"Error: Failed to parse config '{ filepath } '" ) from e
106-
107- # And finally, initialize
108- config = Config (config_dict )
95+ config = Config .from_file (filepath )
10996
11097
11198class Config :
@@ -123,7 +110,7 @@ def __init__(self, raw_config: dict[str, Any]):
123110 # Directory used for storing output
124111 self .out_dir = parsed_raw_config .reconstruction_settings .locations .reconstruction_dir
125112 # A dictionary to store configured datasets against which SPRAS will be run
126- self .datasets = None
113+ self .datasets : dict [ str , DatasetSchema ] = {}
127114 # A dictionary to store configured gold standard data against output of SPRAS runs
128115 self .gold_standards = None
129116 # The hash length SPRAS will use to identify parameter combinations.
@@ -162,6 +149,20 @@ def __init__(self, raw_config: dict[str, Any]):
162149
163150 self .process_config (parsed_raw_config )
164151
152+ @classmethod
153+ def from_file (cls , filepath : LoosePathLike ):
154+ # Handle opening the file and parsing the yaml
155+ filepath = Path (filepath ).absolute ()
156+ try :
157+ with open (filepath , 'r' ) as yaml_file :
158+ config_dict = yaml .safe_load (yaml_file )
159+ except FileNotFoundError as e :
160+ raise RuntimeError (f"Error: The specified config '{ filepath } ' could not be found." ) from e
161+ except yaml .YAMLError as e :
162+ raise RuntimeError (f"Error: Failed to parse config '{ filepath } '" ) from e
163+
164+ return cls (config_dict )
165+
165166 def process_datasets (self , raw_config : RawConfig ):
166167 """
167168 Parse dataset information
@@ -176,16 +177,14 @@ def process_datasets(self, raw_config: RawConfig):
176177 # Convert to dicts to simplify the yaml logging
177178
178179 for dataset in raw_config .datasets :
180+ label = dataset .label
181+ if label .lower () in [key .lower () for key in self .datasets .keys ()]:
182+ raise ValueError (f"Datasets must have unique case-insensitive labels, but the label { label } appears at least twice." )
179183 dataset .label = attach_spras_revision (dataset .label )
180184 for gold_standard in raw_config .gold_standards :
181185 gold_standard .label = attach_spras_revision (gold_standard .label )
182186
183187 self .datasets = {}
184- for dataset in raw_config .datasets :
185- label = dataset .label
186- if label .lower () in [key .lower () for key in self .datasets .keys ()]:
187- raise ValueError (f"Datasets must have unique case-insensitive labels, but the label { label } appears at least twice." )
188- self .datasets [label ] = dict (dataset )
189188
190189 # parse gold standard information
191190 self .gold_standards = {gold_standard .label : dict (gold_standard ) for gold_standard in raw_config .gold_standards }
0 commit comments