@@ -53,7 +53,17 @@ def spras_revision() -> str:
5353 return hashlib .file_digest (f , 'sha256' ).hexdigest ()[:8 ]
5454 except importlib .metadata .PackageNotFoundError as err :
5555 raise RuntimeError ('spras is not an installed pip-module: did you forget to install SPRAS as a module?' ) from err
56- def attach_spras_revision (label : str ) -> str :
56+
57+
58+ def attach_spras_revision (osdf_immutable : bool , label : str ) -> str :
59+ """
60+ Attaches the SPRAS revision to a label.
61+ This function signature may become more complex as specific labels get versioned.
62+
63+ @param label: The label to attach the SPRAS revision to.
64+ @param osdf_immutable: if False, this function is equivalent to `id`.
65+ """
66+ if osdf_immutable is False : return label
5767 return f"{ label } _{ spras_revision ()} "
5868
5969# This will get called in the Snakefile, instantiating the singleton with the raw config
@@ -117,6 +127,8 @@ def __init__(self, raw_config: dict[str, Any]):
117127 self .analysis_include_ml_aggregate_algo = None
118128 # A Boolean specifying whether to run the evaluation per algorithm analysis
119129 self .analysis_include_evaluation_aggregate_algo = None
130+ # Specifies whether the files should be OSDF-immutable (i.e. the file names change when the file itself changes)
131+ self .osdf_immutable = parsed_raw_config .osdf_immutable
120132
121133 self .process_config (parsed_raw_config )
122134
@@ -148,9 +160,9 @@ def process_datasets(self, raw_config: RawConfig):
148160 # Convert to dicts to simplify the yaml logging
149161
150162 for dataset in raw_config .datasets :
151- dataset .label = attach_spras_revision (dataset .label )
163+ dataset .label = attach_spras_revision (self . osdf_immutable , dataset .label )
152164 for gold_standard in raw_config .gold_standards :
153- gold_standard .label = attach_spras_revision (gold_standard .label )
165+ gold_standard .label = attach_spras_revision (self . osdf_immutable , gold_standard .label )
154166
155167 for dataset in raw_config .datasets :
156168 label = dataset .label
@@ -165,11 +177,14 @@ def process_datasets(self, raw_config: RawConfig):
165177 dataset_labels = set (self .datasets .keys ())
166178 gold_standard_dataset_labels = {dataset_label for value in self .gold_standards .values () for dataset_label in value ['dataset_labels' ]}
167179 for label in gold_standard_dataset_labels :
168- if attach_spras_revision (label ) not in dataset_labels :
180+ if attach_spras_revision (self . osdf_immutable , label ) not in dataset_labels :
169181 raise ValueError (f"Dataset label '{ label } ' provided in gold standards does not exist in the existing dataset labels." )
170182 # We attach the SPRAS revision to the individual dataset labels afterwards for a cleaner error message above.
171183 for key , gold_standard in self .gold_standards .items ():
172- self .gold_standards [key ]["dataset_labels" ] = map (attach_spras_revision , gold_standard ["dataset_labels" ])
184+ self .gold_standards [key ]["dataset_labels" ] = map (
185+ functools .partial (attach_spras_revision , osdf_immutable = self .osdf_immutable ),
186+ gold_standard ["dataset_labels" ]
187+ )
173188
174189 # Code snipped from Snakefile that may be useful for assigning default labels
175190 # dataset_labels = [dataset.get('label', f'dataset{index}') for index, dataset in enumerate(datasets)]
0 commit comments