1414
1515import copy as copy
1616import itertools as it
17- import os
1817import warnings
18+ from pathlib import Path
1919from typing import Any
2020
2121import numpy as np
2222import yaml
2323
2424from spras .config .container_schema import ProcessedContainerSettings
25- from spras .config .schema import RawConfig
26- from spras .util import NpHashEncoder , hash_params_sha1_base32
25+ from spras .config .schema import DatasetSchema , RawConfig
26+ from spras .util import LoosePathLike , NpHashEncoder , hash_params_sha1_base32
2727
2828config = None
2929
@@ -34,19 +34,7 @@ def init_global(config_dict):
3434
3535def init_from_file (filepath ):
3636 global config
37-
38- # Handle opening the file and parsing the yaml
39- filepath = os .path .abspath (filepath )
40- try :
41- with open (filepath , 'r' ) as yaml_file :
42- config_dict = yaml .safe_load (yaml_file )
43- except FileNotFoundError as e :
44- raise RuntimeError (f"Error: The specified config '{ filepath } ' could not be found." ) from e
45- except yaml .YAMLError as e :
46- raise RuntimeError (f"Error: Failed to parse config '{ filepath } '" ) from e
47-
48- # And finally, initialize
49- config = Config (config_dict )
37+ config = Config .from_file (filepath )
5038
5139
5240class Config :
@@ -64,7 +52,7 @@ def __init__(self, raw_config: dict[str, Any]):
6452 # Directory used for storing output
6553 self .out_dir = parsed_raw_config .reconstruction_settings .locations .reconstruction_dir
6654 # A dictionary to store configured datasets against which SPRAS will be run
67- self .datasets = None
55+ self .datasets : dict [ str , DatasetSchema ] = {}
6856 # A dictionary to store configured gold standard data against output of SPRAS runs
6957 self .gold_standards = None
7058 # The hash length SPRAS will use to identify parameter combinations.
@@ -81,6 +69,20 @@ def __init__(self, raw_config: dict[str, Any]):
8169
8270 self .process_config (parsed_raw_config )
8371
72+ @classmethod
73+ def from_file (cls , filepath : LoosePathLike ):
74+ # Handle opening the file and parsing the yaml
75+ filepath = Path (filepath ).absolute ()
76+ try :
77+ with open (filepath , 'r' ) as yaml_file :
78+ config_dict = yaml .safe_load (yaml_file )
79+ except FileNotFoundError as e :
80+ raise RuntimeError (f"Error: The specified config '{ filepath } ' could not be found." ) from e
81+ except yaml .YAMLError as e :
82+ raise RuntimeError (f"Error: Failed to parse config '{ filepath } '" ) from e
83+
84+ return cls (config_dict )
85+
8486 def process_datasets (self , raw_config : RawConfig ):
8587 """
8688 Parse dataset information
@@ -93,12 +95,11 @@ def process_datasets(self, raw_config: RawConfig):
9395 # Currently assumes all datasets have a label and the labels are unique
9496 # When Snakemake parses the config file it loads the datasets as OrderedDicts not dicts
9597 # Convert to dicts to simplify the yaml logging
96- self .datasets = {}
9798 for dataset in raw_config .datasets :
9899 label = dataset .label
99100 if label .lower () in [key .lower () for key in self .datasets .keys ()]:
100101 raise ValueError (f"Datasets must have unique case-insensitive labels, but the label { label } appears at least twice." )
101- self .datasets [label ] = dict ( dataset )
102+ self .datasets [label ] = dataset
102103
103104 # parse gold standard information
104105 self .gold_standards = {gold_standard .label : dict (gold_standard ) for gold_standard in raw_config .gold_standards }
0 commit comments