Skip to content

Commit ec58463

Browse files
authored
v2: fixes to validation and upconversion (#351)
* don't create unnecessary experiment tables * write yaml file *after* updating the config dict * check for missing experiments * handle anonymous experiments
1 parent 6e762c6 commit ec58463

File tree

2 files changed

+57
-29
lines changed

2 files changed

+57
-29
lines changed

petab/v2/lint.py

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -228,11 +228,16 @@ class CheckValidPetabIdColumn(ValidationTask):
228228
"""A task to check that a given column contains only valid PEtab IDs."""
229229

230230
def __init__(
231-
self, table_name: str, column_name: str, required_column: bool = True
231+
self,
232+
table_name: str,
233+
column_name: str,
234+
required_column: bool = True,
235+
ignore_nan: bool = False,
232236
):
233237
self.table_name = table_name
234238
self.column_name = column_name
235239
self.required_column = required_column
240+
self.ignore_nan = ignore_nan
236241

237242
def run(self, problem: Problem) -> ValidationIssue | None:
238243
df = getattr(problem, f"{self.table_name}_df")
@@ -248,7 +253,10 @@ def run(self, problem: Problem) -> ValidationIssue | None:
248253
return
249254

250255
try:
251-
check_ids(df[self.column_name].values, kind=self.column_name)
256+
ids = df[self.column_name].values
257+
if self.ignore_nan:
258+
ids = ids[~pd.isna(ids)]
259+
check_ids(ids, kind=self.column_name)
252260
except ValueError as e:
253261
return ValidationError(str(e))
254262

@@ -308,21 +316,26 @@ def run(self, problem: Problem) -> ValidationIssue | None:
308316
except AssertionError as e:
309317
return ValidationError(str(e))
310318

311-
# TODO: introduce some option for validation partial vs full
319+
# TODO: introduce some option for validation of partial vs full
312320
# problem. if this is supposed to be a complete problem, a missing
313321
# condition table should be an error if the measurement table refers
314-
# to conditions
315-
316-
# check that measured experiments
317-
if problem.experiment_df is None:
318-
return
319-
322+
# to conditions, otherwise it should maximally be a warning
320323
used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values)
321-
available_experiments = set(
322-
problem.experiment_df[EXPERIMENT_ID].unique()
324+
# handle default-experiment
325+
used_experiments = set(
326+
filter(
327+
lambda x: not pd.isna(x),
328+
used_experiments,
329+
)
330+
)
331+
# check that measured experiments exist
332+
available_experiments = (
333+
set(problem.experiment_df[EXPERIMENT_ID].unique())
334+
if problem.experiment_df is not None
335+
else set()
323336
)
324337
if missing_experiments := (used_experiments - available_experiments):
325-
raise AssertionError(
338+
return ValidationError(
326339
"Measurement table references experiments that "
327340
"are not specified in the experiments table: "
328341
+ str(missing_experiments)
@@ -826,6 +839,7 @@ def append_overrides(overrides):
826839
CheckMeasurementTable(),
827840
CheckConditionTable(),
828841
CheckExperimentTable(),
842+
CheckValidPetabIdColumn("measurement", EXPERIMENT_ID, ignore_nan=True),
829843
CheckValidPetabIdColumn("experiment", EXPERIMENT_ID),
830844
CheckValidPetabIdColumn("experiment", CONDITION_ID),
831845
CheckExperimentConditionsExist(),

petab/v2/petab1to2.py

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,11 @@
99
import pandas as pd
1010
from pandas.io.common import get_handle, is_url
1111

12-
import petab.v1.C
13-
from petab.models import MODEL_TYPE_SBML
14-
from petab.v1 import Problem as ProblemV1
15-
from petab.yaml import get_path_prefix
16-
1712
from .. import v1, v2
18-
from ..v1.yaml import load_yaml, validate, write_yaml
13+
from ..v1 import Problem as ProblemV1
14+
from ..v1.yaml import get_path_prefix, load_yaml, validate, write_yaml
1915
from ..versions import get_major_version
16+
from .models import MODEL_TYPE_SBML
2017

2118
__all__ = ["petab1to2"]
2219

@@ -63,18 +60,18 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
6360
if get_major_version(yaml_config) != 1:
6461
raise ValueError("PEtab problem is not version 1.")
6562
petab_problem = ProblemV1.from_yaml(yaml_file or yaml_config)
63+
# get rid of conditionName column if present (unsupported in v2)
64+
petab_problem.condition_df = petab_problem.condition_df.drop(
65+
columns=[v1.C.CONDITION_NAME], errors="ignore"
66+
)
6667
if v1.lint_problem(petab_problem):
6768
raise ValueError("Provided PEtab problem does not pass linting.")
6869

70+
output_dir = Path(output_dir)
71+
6972
# Update YAML file
7073
new_yaml_config = _update_yaml(yaml_config)
7174

72-
# Write new YAML file
73-
output_dir = Path(output_dir)
74-
output_dir.mkdir(parents=True, exist_ok=True)
75-
new_yaml_file = output_dir / Path(yaml_file).name
76-
write_yaml(new_yaml_config, new_yaml_file)
77-
7875
# Update tables
7976
# condition tables, observable tables, SBML files, parameter table:
8077
# no changes - just copy
@@ -104,6 +101,19 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
104101
def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
105102
if not sim_cond_id and not preeq_cond_id:
106103
return ""
104+
# check whether the conditions will exist in the v2 condition table
105+
sim_cond_exists = (
106+
petab_problem.condition_df.loc[sim_cond_id].notna().any()
107+
)
108+
preeq_cond_exists = (
109+
preeq_cond_id
110+
and petab_problem.condition_df.loc[preeq_cond_id].notna().any()
111+
)
112+
if not sim_cond_exists and not preeq_cond_exists:
113+
# if we have only all-NaN conditions, we don't create a new
114+
# experiment
115+
return ""
116+
107117
if preeq_cond_id:
108118
preeq_cond_id = f"{preeq_cond_id}_"
109119
exp_id = f"experiment__{preeq_cond_id}__{sim_cond_id}"
@@ -126,6 +136,8 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
126136
sim_cond_id = row[v1.C.SIMULATION_CONDITION_ID]
127137
preeq_cond_id = row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, "")
128138
exp_id = create_experiment_id(sim_cond_id, preeq_cond_id)
139+
if not exp_id:
140+
continue
129141
if preeq_cond_id:
130142
experiments.append(
131143
{
@@ -165,10 +177,8 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
165177
# add pre-eq condition id if not present or convert to string
166178
# for simplicity
167179
if v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns:
168-
measurement_df[
169-
v1.C.PREEQUILIBRATION_CONDITION_ID
170-
] = measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID].astype(
171-
str
180+
measurement_df.fillna(
181+
{v1.C.PREEQUILIBRATION_CONDITION_ID: ""}, inplace=True
172182
)
173183
else:
174184
measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = ""
@@ -177,7 +187,7 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
177187
petab_problem.condition_df is not None
178188
and len(
179189
set(petab_problem.condition_df.columns)
180-
- {petab.v1.C.CONDITION_NAME}
190+
- {v1.C.CONDITION_NAME}
181191
)
182192
== 0
183193
):
@@ -209,6 +219,10 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
209219
measurement_df, get_dest_path(measurement_file)
210220
)
211221

222+
# Write new YAML file
223+
new_yaml_file = output_dir / Path(yaml_file).name
224+
write_yaml(new_yaml_config, new_yaml_file)
225+
212226
# validate updated Problem
213227
validation_issues = v2.lint_problem(new_yaml_file)
214228

0 commit comments

Comments
 (0)