1414
1515import copy as copy
1616import itertools as it
17- import os
1817import warnings
18+ from pathlib import Path
1919from typing import Any , Optional
2020
2121import numpy as np
2222import yaml
2323from pytimeparse import parse
2424
2525from spras .config .container_schema import ProcessedContainerSettings
26- from spras .config .schema import RawConfig
27- from spras .util import NpHashEncoder , hash_params_sha1_base32
26+ from spras .config .schema import DatasetSchema , RawConfig
27+ from spras .util import LoosePathLike , NpHashEncoder , hash_params_sha1_base32
2828
2929config = None
3030
@@ -35,19 +35,7 @@ def init_global(config_dict):
3535
3636def init_from_file (filepath ):
3737 global config
38-
39- # Handle opening the file and parsing the yaml
40- filepath = os .path .abspath (filepath )
41- try :
42- with open (filepath , 'r' ) as yaml_file :
43- config_dict = yaml .safe_load (yaml_file )
44- except FileNotFoundError as e :
45- raise RuntimeError (f"Error: The specified config '{ filepath } ' could not be found." ) from e
46- except yaml .YAMLError as e :
47- raise RuntimeError (f"Error: Failed to parse config '{ filepath } '" ) from e
48-
49- # And finally, initialize
50- config = Config (config_dict )
38+ config = Config .from_file (filepath )
5139
5240
5341class Config :
@@ -65,7 +53,7 @@ def __init__(self, raw_config: dict[str, Any]):
6553 # Directory used for storing output
6654 self .out_dir = parsed_raw_config .reconstruction_settings .locations .reconstruction_dir
6755 # A dictionary to store configured datasets against which SPRAS will be run
68- self .datasets = None
56+ self .datasets : dict [ str , DatasetSchema ] = {}
6957 # A dictionary to store configured gold standard data against output of SPRAS runs
7058 self .gold_standards = None
7159 # The hash length SPRAS will use to identify parameter combinations.
@@ -106,6 +94,20 @@ def __init__(self, raw_config: dict[str, Any]):
10694
10795 self .process_config (parsed_raw_config )
10896
97+ @classmethod
98+ def from_file (cls , filepath : LoosePathLike ):
99+ # Handle opening the file and parsing the yaml
100+ filepath = Path (filepath ).absolute ()
101+ try :
102+ with open (filepath , 'r' ) as yaml_file :
103+ config_dict = yaml .safe_load (yaml_file )
104+ except FileNotFoundError as e :
105+ raise RuntimeError (f"Error: The specified config '{ filepath } ' could not be found." ) from e
106+ except yaml .YAMLError as e :
107+ raise RuntimeError (f"Error: Failed to parse config '{ filepath } '" ) from e
108+
109+ return cls (config_dict )
110+
109111 def process_datasets (self , raw_config : RawConfig ):
110112 """
111113 Parse dataset information
@@ -118,12 +120,11 @@ def process_datasets(self, raw_config: RawConfig):
118120 # Currently assumes all datasets have a label and the labels are unique
119121 # When Snakemake parses the config file it loads the datasets as OrderedDicts not dicts
120122 # Convert to dicts to simplify the yaml logging
121- self .datasets = {}
122123 for dataset in raw_config .datasets :
123124 label = dataset .label
124125 if label .lower () in [key .lower () for key in self .datasets .keys ()]:
125126 raise ValueError (f"Datasets must have unique case-insensitive labels, but the label { label } appears at least twice." )
126- self .datasets [label ] = dict ( dataset )
127+ self .datasets [label ] = dataset
127128
128129 # parse gold standard information
129130 self .gold_standards = {gold_standard .label : dict (gold_standard ) for gold_standard in raw_config .gold_standards }
0 commit comments