Skip to content

Commit 26c4010

Browse files
dweindldilpath
andauthored
Simplification of PEtab problems (#172)
* Simplification of PEtab problems Adds some functions to simplify / clean existing PEtab problems: * removing unused conditions * moving parameters that aren't really condition-dependent from the conditions table to the parameters table Co-authored-by: Dilan Pathirana <[email protected]>
1 parent b6f234e commit 26c4010

File tree

3 files changed

+257
-0
lines changed

3 files changed

+257
-0
lines changed

doc/modules.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,6 @@ API Reference
2323
petab.sampling
2424
petab.sbml
2525
petab.simulate
26+
petab.simplify
2627
petab.visualize
2728
petab.yaml

petab/simplify.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
"""Functionality for simplifying PEtab problems"""
2+
from math import nan
3+
4+
import pandas as pd
5+
6+
import petab
7+
from . import Problem
8+
from .C import * # noqa: F403
9+
from .lint import lint_problem
10+
11+
__all__ = [
12+
"remove_nan_measurements",
13+
"remove_unused_observables",
14+
"remove_unused_conditions",
15+
"simplify_problem",
16+
"condition_parameters_to_parameter_table",
17+
]
18+
19+
20+
def remove_nan_measurements(problem: Problem):
21+
"""Drop any measurements that are NaN"""
22+
problem.measurement_df = problem.measurement_df[
23+
~problem.measurement_df[MEASUREMENT].isna()
24+
]
25+
problem.measurement_df.reset_index(inplace=True, drop=True)
26+
27+
28+
def remove_unused_observables(problem: Problem):
29+
"""Remove observables that have no measurements"""
30+
measured_observables = set(problem.measurement_df[OBSERVABLE_ID].unique())
31+
problem.observable_df = problem.observable_df[
32+
problem.observable_df.index.isin(measured_observables)]
33+
34+
35+
def remove_unused_conditions(problem: Problem):
36+
"""Remove conditions that have no measurements"""
37+
measured_conditions = \
38+
set(problem.measurement_df[SIMULATION_CONDITION_ID].unique())
39+
if PREEQUILIBRATION_CONDITION_ID in problem.measurement_df:
40+
measured_conditions |= \
41+
set(problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].unique())
42+
43+
problem.condition_df = problem.condition_df[
44+
problem.condition_df.index.isin(measured_conditions)]
45+
46+
47+
def simplify_problem(problem: Problem):
48+
if lint_problem(problem):
49+
raise ValueError("Invalid PEtab problem supplied.")
50+
51+
remove_unused_observables(problem)
52+
remove_unused_conditions(problem)
53+
condition_parameters_to_parameter_table(problem)
54+
55+
if lint_problem(problem):
56+
raise AssertionError("Invalid PEtab problem generated.")
57+
58+
59+
def condition_parameters_to_parameter_table(problem: Problem):
60+
"""Move parameters from the condition table to the parameters table, if
61+
the same parameter value is used for all conditions."""
62+
if problem.condition_df is None or problem.condition_df.empty \
63+
or problem.model is None:
64+
return
65+
66+
replacements = {}
67+
for parameter_id in problem.condition_df:
68+
if parameter_id == CONDITION_NAME:
69+
continue
70+
71+
if problem.model.is_state_variable(parameter_id):
72+
# initial states can't go the parameters table
73+
continue
74+
75+
series = problem.condition_df[parameter_id]
76+
value = petab.to_float_if_float(series[0])
77+
78+
# same value for all conditions and no parametric overrides (str)?
79+
if isinstance(value, float) and len(series.unique()) == 1:
80+
replacements[parameter_id] = series[0]
81+
82+
if not replacements:
83+
return
84+
85+
rows = [
86+
{
87+
PARAMETER_ID: parameter_id,
88+
PARAMETER_SCALE: LIN,
89+
LOWER_BOUND: nan,
90+
UPPER_BOUND: nan,
91+
NOMINAL_VALUE: value,
92+
ESTIMATE: 0
93+
}
94+
for parameter_id, value in replacements.items()
95+
]
96+
rows = pd.DataFrame(rows)
97+
rows.set_index(PARAMETER_ID, inplace=True)
98+
99+
if problem.parameter_df is None:
100+
problem.parameter_df = rows
101+
else:
102+
problem.parameter_df = pd.concat([problem.parameter_df, rows])
103+
104+
problem.condition_df = \
105+
problem.condition_df.drop(columns=replacements.keys())

tests/test_simplify.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
"""Tests for petab.simplify.*"""
2+
from math import nan
3+
4+
import pandas as pd
5+
import pytest
6+
import simplesbml
7+
from pandas.testing import *
8+
9+
from petab import Problem
10+
from petab.C import * # noqa: F403
11+
from petab.models.sbml_model import SbmlModel
12+
from petab.simplify import *
13+
14+
15+
@pytest.fixture
16+
def problem() -> Problem:
17+
ss_model = simplesbml.SbmlModel()
18+
ss_model.addParameter("some_parameter", val=1.0)
19+
ss_model.addParameter("same_value_for_all_conditions", val=1.0)
20+
21+
observable_df = pd.DataFrame(
22+
{
23+
OBSERVABLE_ID: ["obs_used", "obs_unused", "obs_used_2"],
24+
OBSERVABLE_FORMULA: [1.0, 2.0, 3.0],
25+
NOISE_FORMULA: [1.0, 2.0, 3.0],
26+
}
27+
)
28+
observable_df.set_index(OBSERVABLE_ID, inplace=True)
29+
30+
conditions_df = pd.DataFrame(
31+
{
32+
CONDITION_ID: ["condition_used_1",
33+
"condition_unused",
34+
"condition_used_2"],
35+
"some_parameter": [1.0, 2.0, 3.0],
36+
"same_value_for_all_conditions": [4.0] * 3,
37+
}
38+
)
39+
conditions_df.set_index(CONDITION_ID, inplace=True)
40+
41+
measurement_df = pd.DataFrame(
42+
{
43+
OBSERVABLE_ID: ["obs_used", "obs_used_2", "obs_used"],
44+
MEASUREMENT: [1.0, 1.5, 2.0],
45+
SIMULATION_CONDITION_ID: ["condition_used_1",
46+
"condition_used_1",
47+
"condition_used_2"],
48+
TIME: [1.0] * 3,
49+
}
50+
)
51+
yield Problem(
52+
model=SbmlModel(sbml_model=ss_model.getModel()),
53+
condition_df=conditions_df,
54+
observable_df=observable_df,
55+
measurement_df=measurement_df,
56+
)
57+
58+
59+
def test_remove_nan_measurements(problem):
60+
expected = pd.DataFrame(
61+
{
62+
OBSERVABLE_ID: ["obs_used"] * 2,
63+
MEASUREMENT: [1.0, 2.0],
64+
SIMULATION_CONDITION_ID:
65+
["condition_used_1", "condition_used_2"],
66+
TIME: [1.0] * 2,
67+
}
68+
)
69+
70+
problem.measurement_df = pd.DataFrame(
71+
{
72+
OBSERVABLE_ID: ["obs_used", "obs_with_nan", "obs_used"],
73+
MEASUREMENT: [1.0, nan, 2.0],
74+
SIMULATION_CONDITION_ID: ["condition_used_1",
75+
"condition_used_1",
76+
"condition_used_2"],
77+
TIME: [1.0] * 3,
78+
}
79+
)
80+
assert not problem.measurement_df.equals(expected)
81+
82+
remove_nan_measurements(problem)
83+
84+
assert_frame_equal(problem.measurement_df, expected)
85+
86+
87+
def test_remove_unused_observables(problem):
88+
expected = pd.DataFrame(
89+
{
90+
OBSERVABLE_ID: ["obs_used", "obs_used_2"],
91+
OBSERVABLE_FORMULA: [1.0, 3.0],
92+
NOISE_FORMULA: [1.0, 3.0],
93+
}
94+
)
95+
expected.set_index(OBSERVABLE_ID, inplace=True)
96+
assert not problem.observable_df.equals(expected)
97+
98+
remove_unused_observables(problem)
99+
100+
assert_frame_equal(problem.observable_df, expected)
101+
102+
103+
def test_remove_unused_conditions(problem):
104+
expected = pd.DataFrame(
105+
{
106+
CONDITION_ID: ["condition_used_1",
107+
"condition_used_2"],
108+
"some_parameter": [1.0, 3.0],
109+
"same_value_for_all_conditions": [4.0] * 2,
110+
}
111+
)
112+
expected.set_index(CONDITION_ID, inplace=True)
113+
assert not problem.condition_df.equals(expected)
114+
115+
remove_unused_conditions(problem)
116+
117+
assert_frame_equal(problem.condition_df, expected)
118+
119+
120+
def test_condition_parameters_to_parameter_table(problem):
121+
expected_conditions = pd.DataFrame(
122+
{
123+
CONDITION_ID: ["condition_used_1",
124+
"condition_unused",
125+
"condition_used_2"],
126+
"some_parameter": [1.0, 2.0, 3.0],
127+
}
128+
)
129+
expected_conditions.set_index(CONDITION_ID, inplace=True)
130+
assert not problem.condition_df.equals(expected_conditions)
131+
132+
expected_parameters = pd.DataFrame({
133+
PARAMETER_ID: ["same_value_for_all_conditions"],
134+
PARAMETER_SCALE: [LIN],
135+
LOWER_BOUND: [nan],
136+
UPPER_BOUND: [nan],
137+
NOMINAL_VALUE: [4.0],
138+
ESTIMATE: [0],
139+
})
140+
expected_parameters.set_index(PARAMETER_ID, inplace=True)
141+
assert problem.parameter_df is None
142+
143+
condition_parameters_to_parameter_table(problem)
144+
145+
assert_frame_equal(problem.condition_df, expected_conditions)
146+
assert_frame_equal(problem.parameter_df, expected_parameters)
147+
148+
149+
def test_simplify_problem(problem):
150+
# simplify_problem checks whether the result is valid
151+
simplify_problem(problem)

0 commit comments

Comments
 (0)