1414
1515import copy as copy
1616import itertools as it
17- import os
1817import warnings
18+ from pathlib import Path
1919from typing import Any
2020
2121import numpy as np
2222import yaml
2323
2424from spras .config .container_schema import ProcessedContainerSettings
25- from spras .config .schema import RawConfig
26- from spras .util import NpHashEncoder , hash_params_sha1_base32
25+ from spras .config .schema import DatasetSchema , RawConfig
26+ from spras .util import LoosePathLike , NpHashEncoder , hash_params_sha1_base32
2727
2828config = None
2929
@@ -34,19 +34,7 @@ def init_global(config_dict):
3434
3535def init_from_file (filepath ):
3636 global config
37-
38- # Handle opening the file and parsing the yaml
39- filepath = os .path .abspath (filepath )
40- try :
41- with open (filepath , 'r' ) as yaml_file :
42- config_dict = yaml .safe_load (yaml_file )
43- except FileNotFoundError as e :
44- raise RuntimeError (f"Error: The specified config '{ filepath } ' could not be found." ) from e
45- except yaml .YAMLError as e :
46- raise RuntimeError (f"Error: Failed to parse config '{ filepath } '" ) from e
47-
48- # And finally, initialize
49- config = Config (config_dict )
37+ config = Config .from_file (filepath )
5038
5139
5240class Config :
@@ -64,7 +52,7 @@ def __init__(self, raw_config: dict[str, Any]):
6452 # Directory used for storing output
6553 self .out_dir = parsed_raw_config .reconstruction_settings .locations .reconstruction_dir
6654 # A dictionary to store configured datasets against which SPRAS will be run
67- self .datasets = None
55+ self .datasets : dict [ str , DatasetSchema ] = {}
6856 # A dictionary to store configured gold standard data against output of SPRAS runs
6957 self .gold_standards = None
7058 # The hash length SPRAS will use to identify parameter combinations.
@@ -103,6 +91,20 @@ def __init__(self, raw_config: dict[str, Any]):
10391
10492 self .process_config (parsed_raw_config )
10593
94+ @classmethod
95+ def from_file (cls , filepath : LoosePathLike ):
96+ # Handle opening the file and parsing the yaml
97+ filepath = Path (filepath ).absolute ()
98+ try :
99+ with open (filepath , 'r' ) as yaml_file :
100+ config_dict = yaml .safe_load (yaml_file )
101+ except FileNotFoundError as e :
102+ raise RuntimeError (f"Error: The specified config '{ filepath } ' could not be found." ) from e
103+ except yaml .YAMLError as e :
104+ raise RuntimeError (f"Error: Failed to parse config '{ filepath } '" ) from e
105+
106+ return cls (config_dict )
107+
106108 def process_datasets (self , raw_config : RawConfig ):
107109 """
108110 Parse dataset information
@@ -115,12 +117,11 @@ def process_datasets(self, raw_config: RawConfig):
115117 # Currently assumes all datasets have a label and the labels are unique
116118 # When Snakemake parses the config file it loads the datasets as OrderedDicts not dicts
117119 # Convert to dicts to simplify the yaml logging
118- self .datasets = {}
119120 for dataset in raw_config .datasets :
120121 label = dataset .label
121122 if label .lower () in [key .lower () for key in self .datasets .keys ()]:
122123 raise ValueError (f"Datasets must have unique case-insensitive labels, but the label { label } appears at least twice." )
123- self .datasets [label ] = dict ( dataset )
124+ self .datasets [label ] = dataset
124125
125126 # parse gold standard information
126127 self .gold_standards = {gold_standard .label : dict (gold_standard ) for gold_standard in raw_config .gold_standards }
0 commit comments