Skip to content

Commit cf82b88

Browse files
dweindldilpath
andauthored
Function for replacing objectivePrior* by observables/measurements (#309)
Reformulate the given PEtab problem such that the objective priors are converted to measurements. This is done by adding a new observable for each prior and adding a corresponding measurement to the measurement table. The resulting optimization problem will be equivalent to the original problem. This is meant to be used for tools that do not support priors. Closes #307 --------- Co-authored-by: Dilan Pathirana <[email protected]>
1 parent fdd9d95 commit cf82b88

File tree

3 files changed

+341
-1
lines changed

3 files changed

+341
-1
lines changed

petab/v1/parameters.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@ def get_priors_from_df(
458458
# get types and parameters of priors from dataframe
459459
par_to_estimate = parameter_df.loc[parameter_df[ESTIMATE] == 1]
460460

461-
if parameter_ids:
461+
if parameter_ids is not None:
462462
try:
463463
par_to_estimate = par_to_estimate.loc[parameter_ids, :]
464464
except KeyError as e:

petab/v1/priors.py

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
"""Functions related to prior handling."""
2+
import copy
3+
4+
import numpy as np
5+
import pandas as pd
6+
7+
from ..v2.C import PREEQUILIBRATION_CONDITION_ID
8+
from . import (
9+
ESTIMATE,
10+
LAPLACE,
11+
LIN,
12+
LOG,
13+
LOG10,
14+
LOG_LAPLACE,
15+
LOG_NORMAL,
16+
MEASUREMENT,
17+
NOISE_DISTRIBUTION,
18+
NOISE_FORMULA,
19+
NOISE_PARAMETERS,
20+
NORMAL,
21+
OBJECTIVE_PRIOR_PARAMETERS,
22+
OBJECTIVE_PRIOR_TYPE,
23+
OBSERVABLE_FORMULA,
24+
OBSERVABLE_ID,
25+
OBSERVABLE_TRANSFORMATION,
26+
PARAMETER_SCALE,
27+
PARAMETER_SCALE_LAPLACE,
28+
PARAMETER_SCALE_NORMAL,
29+
PARAMETER_SEPARATOR,
30+
SIMULATION_CONDITION_ID,
31+
TIME,
32+
Problem,
33+
)
34+
35+
36+
def priors_to_measurements(problem: Problem):
37+
"""Convert priors to measurements.
38+
39+
Reformulate the given problem such that the objective priors are converted
40+
to measurements. This is done by adding a new observable
41+
``prior_{parameter_id}`` for each estimated parameter that has an objective
42+
prior, and adding a corresponding measurement to the measurement table.
43+
The new measurement is the prior distribution itself. The resulting
44+
optimization problem will be equivalent to the original problem.
45+
This is meant to be used for tools that do not support priors.
46+
47+
The conversion involves the probability density function (PDF) of the
48+
prior, the parameters (e.g., location and scale) of that prior PDF, and the
49+
scale and value of the estimated parameter. Currently, `uniform` priors are
50+
not supported by this method. This method creates observables with:
51+
52+
- `observableFormula`: the parameter value on the `parameterScale`
53+
- `observableTransformation`: `log` for `logNormal`/`logLaplace`
54+
distributions, `lin` otherwise
55+
56+
and measurements with:
57+
58+
- `measurement`: the PDF location
59+
- `noiseFormula`: the PDF scale
60+
61+
Arguments
62+
---------
63+
problem:
64+
The problem to be converted.
65+
66+
Returns
67+
-------
68+
The new problem with the priors converted to measurements.
69+
"""
70+
new_problem = copy.deepcopy(problem)
71+
72+
# we only need to consider parameters that are estimated
73+
par_df_tmp = problem.parameter_df.loc[problem.parameter_df[ESTIMATE] == 1]
74+
75+
if (
76+
OBJECTIVE_PRIOR_TYPE not in par_df_tmp
77+
or par_df_tmp.get(OBJECTIVE_PRIOR_TYPE).isna().all()
78+
or OBJECTIVE_PRIOR_PARAMETERS not in par_df_tmp
79+
or par_df_tmp.get(OBJECTIVE_PRIOR_PARAMETERS).isna().all()
80+
):
81+
# nothing to do
82+
return new_problem
83+
84+
def scaled_observable_formula(parameter_id, parameter_scale):
85+
if parameter_scale == LIN:
86+
return parameter_id
87+
if parameter_scale == LOG:
88+
return f"ln({parameter_id})"
89+
if parameter_scale == LOG10:
90+
return f"log10({parameter_id})"
91+
raise ValueError(f"Unknown parameter scale {parameter_scale}.")
92+
93+
new_measurement_dicts = []
94+
new_observable_dicts = []
95+
for _, row in par_df_tmp.iterrows():
96+
prior_type = row[OBJECTIVE_PRIOR_TYPE]
97+
parameter_scale = row.get(PARAMETER_SCALE, LIN)
98+
if pd.isna(prior_type):
99+
if not pd.isna(row[OBJECTIVE_PRIOR_PARAMETERS]):
100+
raise AssertionError(
101+
"Objective prior parameters are set, but prior type is "
102+
"not specified."
103+
)
104+
continue
105+
106+
if "uniform" in prior_type.lower():
107+
# for measurements, "uniform" is not supported yet
108+
# if necessary, this could still be implemented by adding another
109+
# observable/measurement that will produce a constant objective
110+
# offset
111+
raise NotImplementedError("Uniform priors are not supported.")
112+
113+
parameter_id = row.name
114+
prior_parameters = tuple(
115+
map(
116+
float,
117+
row[OBJECTIVE_PRIOR_PARAMETERS].split(PARAMETER_SEPARATOR),
118+
)
119+
)
120+
if len(prior_parameters) != 2:
121+
raise AssertionError(
122+
"Expected two objective prior parameters for parameter "
123+
f"{parameter_id}, but got {prior_parameters}."
124+
)
125+
126+
# create new observable
127+
new_obs_id = f"prior_{parameter_id}"
128+
if new_obs_id in new_problem.observable_df.index:
129+
raise ValueError(
130+
f"Observable ID {new_obs_id}, which is to be "
131+
"created, already exists."
132+
)
133+
new_observable = {
134+
OBSERVABLE_ID: new_obs_id,
135+
OBSERVABLE_FORMULA: scaled_observable_formula(
136+
parameter_id,
137+
parameter_scale if "parameterScale" in prior_type else LIN,
138+
),
139+
NOISE_FORMULA: f"noiseParameter1_{new_obs_id}",
140+
}
141+
if prior_type in (LOG_NORMAL, LOG_LAPLACE):
142+
new_observable[OBSERVABLE_TRANSFORMATION] = LOG
143+
elif OBSERVABLE_TRANSFORMATION in new_problem.observable_df:
144+
# only set default if the column is already present
145+
new_observable[OBSERVABLE_TRANSFORMATION] = LIN
146+
147+
if prior_type in (NORMAL, PARAMETER_SCALE_NORMAL, LOG_NORMAL):
148+
new_observable[NOISE_DISTRIBUTION] = NORMAL
149+
elif prior_type in (LAPLACE, PARAMETER_SCALE_LAPLACE, LOG_LAPLACE):
150+
new_observable[NOISE_DISTRIBUTION] = LAPLACE
151+
else:
152+
raise NotImplementedError(
153+
f"Objective prior type {prior_type} is not implemented."
154+
)
155+
156+
new_observable_dicts.append(new_observable)
157+
158+
# add measurement
159+
# we could just use any condition and time point since the parameter
160+
# value is constant. however, using an existing timepoint and
161+
# (preequilibrationConditionId+)simulationConditionId will avoid
162+
# requiring extra simulations and solver stops in tools that do not
163+
# check for time dependency of the observable. we use the first
164+
# condition/timepoint from the measurement table
165+
new_measurement = {
166+
OBSERVABLE_ID: new_obs_id,
167+
TIME: problem.measurement_df[TIME].iloc[0],
168+
MEASUREMENT: prior_parameters[0],
169+
NOISE_PARAMETERS: prior_parameters[1],
170+
SIMULATION_CONDITION_ID: new_problem.measurement_df[
171+
SIMULATION_CONDITION_ID
172+
].iloc[0],
173+
}
174+
if PREEQUILIBRATION_CONDITION_ID in new_problem.measurement_df:
175+
new_measurement[
176+
PREEQUILIBRATION_CONDITION_ID
177+
] = new_problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].iloc[
178+
0
179+
]
180+
new_measurement_dicts.append(new_measurement)
181+
182+
# remove prior from parameter table
183+
new_problem.parameter_df.loc[
184+
parameter_id, OBJECTIVE_PRIOR_TYPE
185+
] = np.nan
186+
new_problem.parameter_df.loc[
187+
parameter_id, OBJECTIVE_PRIOR_PARAMETERS
188+
] = np.nan
189+
190+
new_problem.observable_df = pd.concat(
191+
[
192+
pd.DataFrame(new_observable_dicts).set_index(OBSERVABLE_ID),
193+
new_problem.observable_df,
194+
]
195+
)
196+
new_problem.measurement_df = pd.concat(
197+
[
198+
pd.DataFrame(new_measurement_dicts),
199+
new_problem.measurement_df,
200+
],
201+
ignore_index=True,
202+
)
203+
return new_problem

tests/v1/test_priors.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
from copy import deepcopy
2+
from pathlib import Path
3+
4+
import benchmark_models_petab
5+
import numpy as np
6+
import pandas as pd
7+
import pytest
8+
from scipy.stats import norm
9+
10+
import petab.v1
11+
from petab.v1 import get_simulation_conditions
12+
from petab.v1.priors import priors_to_measurements
13+
14+
15+
@pytest.mark.parametrize(
16+
"problem_id", ["Schwen_PONE2014", "Isensee_JCB2018", "Raimundez_PCB2020"]
17+
)
18+
def test_priors_to_measurements(problem_id):
19+
"""Test the conversion of priors to measurements."""
20+
petab_problem_priors: petab.v1.Problem = (
21+
benchmark_models_petab.get_problem(problem_id)
22+
)
23+
petab_problem_priors.visualization_df = None
24+
assert petab.v1.lint_problem(petab_problem_priors) is False
25+
26+
if problem_id == "Isensee_JCB2018":
27+
# required to match the stored simulation results below
28+
petab.v1.flatten_timepoint_specific_output_overrides(
29+
petab_problem_priors
30+
)
31+
assert petab.v1.lint_problem(petab_problem_priors) is False
32+
original_problem = deepcopy(petab_problem_priors)
33+
34+
petab_problem_measurements = priors_to_measurements(petab_problem_priors)
35+
36+
# check that the original problem is not modified
37+
for attr in [
38+
"condition_df",
39+
"parameter_df",
40+
"observable_df",
41+
"measurement_df",
42+
]:
43+
assert (
44+
diff := getattr(petab_problem_priors, attr).compare(
45+
getattr(original_problem, attr)
46+
)
47+
).empty, diff
48+
# check that measurements and observables were added
49+
assert petab.v1.lint_problem(petab_problem_measurements) is False
50+
assert (
51+
petab_problem_measurements.parameter_df.shape[0]
52+
== petab_problem_priors.parameter_df.shape[0]
53+
)
54+
assert (
55+
petab_problem_measurements.observable_df.shape[0]
56+
> petab_problem_priors.observable_df.shape[0]
57+
)
58+
assert (
59+
petab_problem_measurements.measurement_df.shape[0]
60+
> petab_problem_priors.measurement_df.shape[0]
61+
)
62+
# ensure we didn't introduce any new conditions
63+
assert len(
64+
get_simulation_conditions(petab_problem_measurements.measurement_df)
65+
) == len(get_simulation_conditions(petab_problem_priors.measurement_df))
66+
67+
# verify that the objective function value is the same
68+
69+
# load/construct the simulation results
70+
simulation_df_priors = petab.v1.get_simulation_df(
71+
Path(
72+
benchmark_models_petab.MODELS_DIR,
73+
problem_id,
74+
f"simulatedData_{problem_id}.tsv",
75+
)
76+
)
77+
simulation_df_measurements = pd.concat(
78+
[
79+
petab_problem_measurements.measurement_df.rename(
80+
columns={petab.v1.MEASUREMENT: petab.v1.SIMULATION}
81+
)[
82+
petab_problem_measurements.measurement_df[
83+
petab.v1.C.OBSERVABLE_ID
84+
].str.startswith("prior_")
85+
],
86+
simulation_df_priors,
87+
]
88+
)
89+
90+
llh_priors = petab.v1.calculate_llh_for_table(
91+
petab_problem_priors.measurement_df,
92+
simulation_df_priors,
93+
petab_problem_priors.observable_df,
94+
petab_problem_priors.parameter_df,
95+
)
96+
llh_measurements = petab.v1.calculate_llh_for_table(
97+
petab_problem_measurements.measurement_df,
98+
simulation_df_measurements,
99+
petab_problem_measurements.observable_df,
100+
petab_problem_measurements.parameter_df,
101+
)
102+
103+
# get prior objective function contribution
104+
parameter_ids = petab_problem_priors.parameter_df.index.values[
105+
(petab_problem_priors.parameter_df[petab.v1.ESTIMATE] == 1)
106+
& petab_problem_priors.parameter_df[
107+
petab.v1.OBJECTIVE_PRIOR_TYPE
108+
].notna()
109+
]
110+
priors = petab.v1.get_priors_from_df(
111+
petab_problem_priors.parameter_df,
112+
mode="objective",
113+
parameter_ids=parameter_ids,
114+
)
115+
prior_contrib = 0
116+
for parameter_id, prior in zip(parameter_ids, priors, strict=True):
117+
prior_type, prior_pars, par_scale, par_bounds = prior
118+
if prior_type == petab.v1.PARAMETER_SCALE_NORMAL:
119+
prior_contrib += norm.logpdf(
120+
petab_problem_priors.x_nominal_free_scaled[
121+
petab_problem_priors.x_free_ids.index(parameter_id)
122+
],
123+
loc=prior_pars[0],
124+
scale=prior_pars[1],
125+
)
126+
else:
127+
# enable other models, once libpetab has proper support for
128+
# evaluating the prior contribution. until then, two test
129+
# problems should suffice
130+
assert problem_id == "Raimundez_PCB2020"
131+
pytest.skip(f"Prior type {prior_type} not implemented")
132+
133+
assert np.isclose(
134+
llh_priors + prior_contrib, llh_measurements, rtol=1e-3, atol=1e-16
135+
), (llh_priors + prior_contrib, llh_measurements)
136+
# check that the tolerance is not too high
137+
assert np.abs(prior_contrib) > 1e-3 * np.abs(llh_priors)

0 commit comments

Comments
 (0)