Skip to content

Commit 90b4e1f

Browse files
committed
feat: optional spras revision
1 parent dba2b45 commit 90b4e1f

File tree

2 files changed

+27
-5
lines changed

2 files changed

+27
-5
lines changed

spras/config/config.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,17 @@ def spras_revision() -> str:
5353
return hashlib.file_digest(f, 'sha256').hexdigest()[:8]
5454
except importlib.metadata.PackageNotFoundError as err:
5555
raise RuntimeError('spras is not an installed pip-module: did you forget to install SPRAS as a module?') from err
56-
def attach_spras_revision(label: str) -> str:
56+
57+
58+
def attach_spras_revision(osdf_immutable: bool, label: str) -> str:
59+
"""
60+
Attaches the SPRAS revision to a label.
61+
This function signature may become more complex as specific labels get versioned.
62+
63+
@param label: The label to attach the SPRAS revision to.
64+
@param osdf_immutable: if False, this function is equivalent to `id`.
65+
"""
66+
if osdf_immutable is False: return label
5767
return f"{label}_{spras_revision()}"
5868

5969
# This will get called in the Snakefile, instantiating the singleton with the raw config
@@ -117,6 +127,8 @@ def __init__(self, raw_config: dict[str, Any]):
117127
self.analysis_include_ml_aggregate_algo = None
118128
# A Boolean specifying whether to run the evaluation per algorithm analysis
119129
self.analysis_include_evaluation_aggregate_algo = None
130+
# Specifies whether the files should be OSDF-immutable (i.e. the file names change when the file itself changes)
131+
self.osdf_immutable = parsed_raw_config.osdf_immutable
120132

121133
self.process_config(parsed_raw_config)
122134

@@ -148,9 +160,9 @@ def process_datasets(self, raw_config: RawConfig):
148160
# Convert to dicts to simplify the yaml logging
149161

150162
for dataset in raw_config.datasets:
151-
dataset.label = attach_spras_revision(dataset.label)
163+
dataset.label = attach_spras_revision(self.osdf_immutable, dataset.label)
152164
for gold_standard in raw_config.gold_standards:
153-
gold_standard.label = attach_spras_revision(gold_standard.label)
165+
gold_standard.label = attach_spras_revision(self.osdf_immutable, gold_standard.label)
154166

155167
for dataset in raw_config.datasets:
156168
label = dataset.label
@@ -165,11 +177,14 @@ def process_datasets(self, raw_config: RawConfig):
165177
dataset_labels = set(self.datasets.keys())
166178
gold_standard_dataset_labels = {dataset_label for value in self.gold_standards.values() for dataset_label in value['dataset_labels']}
167179
for label in gold_standard_dataset_labels:
168-
if attach_spras_revision(label) not in dataset_labels:
180+
if attach_spras_revision(self.osdf_immutable, label) not in dataset_labels:
169181
raise ValueError(f"Dataset label '{label}' provided in gold standards does not exist in the existing dataset labels.")
170182
# We attach the SPRAS revision to the individual dataset labels afterwards for a cleaner error message above.
171183
for key, gold_standard in self.gold_standards.items():
172-
self.gold_standards[key]["dataset_labels"] = map(attach_spras_revision, gold_standard["dataset_labels"])
184+
self.gold_standards[key]["dataset_labels"] = map(
185+
functools.partial(attach_spras_revision, osdf_immutable=self.osdf_immutable),
186+
gold_standard["dataset_labels"]
187+
)
173188

174189
# Code snipped from Snakefile that may be useful for assigning default labels
175190
# dataset_labels = [dataset.get('label', f'dataset{index}') for index, dataset in enumerate(datasets)]

spras/config/schema.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,13 @@ class ReconstructionSettings(BaseModel):
101101

102102
class RawConfig(BaseModel):
103103
containers: ContainerSettings
104+
osdf_immutable: bool = False
105+
"""
106+
If enabled, this tags all files with their local file version.
107+
Most files do not have a specific version, and by default, this will be the hash of
108+
all the SPRAS files in the PyPA installation. This option will not work if SPRAS was not installed
109+
in a PyPA-compliant manner (PyPA-compliant installations include but are not limited to pip, poetry, uv, conda, pixi.)
110+
"""
104111

105112
hash_length: int = DEFAULT_HASH_LENGTH
106113
"The length of the hash used to identify a parameter combination"

0 commit comments

Comments
 (0)