Fix experiment table in upconversion

dweindl · dweindl · commit d3dd841eb0bb · 2024-12-20T13:57:40.000+01:00
diff --git a/petab/v2/lint.py b/petab/v2/lint.py
@@ -228,11 +228,16 @@ class CheckValidPetabIdColumn(ValidationTask):
     """A task to check that a given column contains only valid PEtab IDs."""
 
     def __init__(
-        self, table_name: str, column_name: str, required_column: bool = True
+        self,
+        table_name: str,
+        column_name: str,
+        required_column: bool = True,
+        ignore_nan: bool = False,
     ):
         self.table_name = table_name
         self.column_name = column_name
         self.required_column = required_column
+        self.ignore_nan = ignore_nan
 
     def run(self, problem: Problem) -> ValidationIssue | None:
         df = getattr(problem, f"{self.table_name}_df")
@@ -248,7 +253,10 @@ def run(self, problem: Problem) -> ValidationIssue | None:
             return
 
         try:
-            check_ids(df[self.column_name].values, kind=self.column_name)
+            ids = df[self.column_name].values
+            if self.ignore_nan:
+                ids = ids[~pd.isna(ids)]
+            check_ids(ids, kind=self.column_name)
         except ValueError as e:
             return ValidationError(str(e))
 
@@ -308,21 +316,26 @@ def run(self, problem: Problem) -> ValidationIssue | None:
         except AssertionError as e:
             return ValidationError(str(e))
 
-        # TODO: introduce some option for validation partial vs full
+        # TODO: introduce some option for validation of partial vs full
         #  problem. if this is supposed to be a complete problem, a missing
         #  condition table should be an error if the measurement table refers
-        #  to conditions
-
-        # check that measured experiments
-        if problem.experiment_df is None:
-            return
-
+        #  to conditions, otherwise it should maximally be a warning
         used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values)
-        available_experiments = set(
-            problem.experiment_df[EXPERIMENT_ID].unique()
+        # handle default-experiment
+        used_experiments = set(
+            filter(
+                lambda x: not isinstance(x, float) or not np.isnan(x),
+                used_experiments,
+            )
+        )
+        # check that measured experiments exist
+        available_experiments = (
+            set(problem.experiment_df[EXPERIMENT_ID].unique())
+            if problem.experiment_df is not None
+            else set()
         )
         if missing_experiments := (used_experiments - available_experiments):
-            raise AssertionError(
+            return ValidationError(
                 "Measurement table references experiments that "
                 "are not specified in the experiments table: "
                 + str(missing_experiments)
@@ -826,7 +839,7 @@ def append_overrides(overrides):
     CheckMeasurementTable(),
     CheckConditionTable(),
     CheckExperimentTable(),
-    CheckValidPetabIdColumn("experiment", EXPERIMENT_ID),
+    CheckValidPetabIdColumn("experiment", EXPERIMENT_ID, ignore_nan=True),
     CheckValidPetabIdColumn("experiment", CONDITION_ID),
     CheckExperimentConditionsExist(),
     CheckObservableTable(),
diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py
@@ -63,6 +63,10 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
     if get_major_version(yaml_config) != 1:
         raise ValueError("PEtab problem is not version 1.")
     petab_problem = ProblemV1.from_yaml(yaml_file or yaml_config)
+    # get rid of conditionName column if present (unsupported in v2)
+    petab_problem.condition_df = petab_problem.condition_df.drop(
+        columns=[v1.C.CONDITION_NAME], errors="ignore"
+    )
     if v1.lint_problem(petab_problem):
         raise ValueError("Provided PEtab problem does not pass linting.")
 
@@ -72,8 +76,6 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
     # Write new YAML file
     output_dir = Path(output_dir)
     output_dir.mkdir(parents=True, exist_ok=True)
-    new_yaml_file = output_dir / Path(yaml_file).name
-    write_yaml(new_yaml_config, new_yaml_file)
 
     # Update tables
     # condition tables, observable tables, SBML files, parameter table:
@@ -104,6 +106,19 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
         def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
             if not sim_cond_id and not preeq_cond_id:
                 return ""
+            # check whether the conditions will exist in the v2 condition table
+            sim_cond_exists = (
+                petab_problem.condition_df.loc[sim_cond_id].notna().any()
+            )
+            preeq_cond_exists = (
+                preeq_cond_id
+                and petab_problem.condition_df.loc[preeq_cond_id].notna().any()
+            )
+            if not sim_cond_exists and not preeq_cond_exists:
+                # if we have only all-NaN conditions, we don't create a new
+                #  experiment
+                return ""
+
             if preeq_cond_id:
                 preeq_cond_id = f"{preeq_cond_id}_"
             exp_id = f"experiment__{preeq_cond_id}__{sim_cond_id}"
@@ -126,6 +141,8 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
             sim_cond_id = row[v1.C.SIMULATION_CONDITION_ID]
             preeq_cond_id = row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, "")
             exp_id = create_experiment_id(sim_cond_id, preeq_cond_id)
+            if not exp_id:
+                continue
             if preeq_cond_id:
                 experiments.append(
                     {
@@ -167,8 +184,8 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
             if v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns:
                 measurement_df[
                     v1.C.PREEQUILIBRATION_CONDITION_ID
-                ] = measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID].astype(
-                    str
+                ] = measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID].fillna(
+                    ""
                 )
             else:
                 measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = ""
@@ -209,6 +226,9 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
                 measurement_df, get_dest_path(measurement_file)
             )
 
+    new_yaml_file = output_dir / Path(yaml_file).name
+    write_yaml(new_yaml_config, new_yaml_file)
+
     # validate updated Problem
     validation_issues = v2.lint_problem(new_yaml_file)