Skip to content

Commit c3d51d6

Browse files
authored
Merge pull request #496 from PEtab-dev/prepare_new_release
Release 0.1.14
2 parents d948970 + 064680e commit c3d51d6

File tree

6 files changed

+129
-27
lines changed

6 files changed

+129
-27
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@
22

33
## 0.1 series
44

5+
### 0.1.14
6+
7+
* Fix sampling of priors in `parameterScale` (#492)
8+
* Clarify documentation of `parameterScale` priors
9+
* Improvements in `petab.simulate` (#479):
10+
* Fix default noise distributions
11+
* Add option for non-negative synthetic data
12+
513
### 0.1.13
614

715
* Fix for pandas 1.2.0 -- use `get_handle` instead of `get_filepath_or_buffer`

doc/documentation_data_format.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,9 @@ Detailed field description
537537

538538
Prior parameters used for sampling of initial points for optimization,
539539
separated by a semicolon. Defaults to ``lowerBound;upperBound``.
540+
The parameters are expected to be in linear scale except for the
541+
``parameterScale`` priors, where the prior parameters are expected to be
542+
in parameter scale.
540543

541544
So far, only numeric values will be supported, no parameter names.
542545
Parameters for the different prior types are:

petab/parameters.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,8 @@ def get_priors_from_df(parameter_df: pd.DataFrame,
343343
# if no prior is specified, we assume a non-informative (uniform) one
344344
if prior_type == 'nan':
345345
prior_type = PARAMETER_SCALE_UNIFORM
346-
prior_pars = (row[LOWER_BOUND], row[UPPER_BOUND])
346+
prior_pars = (scale(row[LOWER_BOUND], par_scale),
347+
scale(row[UPPER_BOUND], par_scale))
347348

348349
prior_list.append((prior_type, prior_pars, par_scale, par_bounds))
349350

petab/simulate.py

Lines changed: 46 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
"""PEtab simulator base class and related functions."""
2+
13
import abc
24
import numpy as np
35
import pathlib
@@ -10,8 +12,8 @@
1012

1113

1214
class Simulator(abc.ABC):
13-
"""
14-
Base class that specific simulators should inherit.
15+
"""Base class that specific simulators should inherit.
16+
1517
Specific simulators should minimally implement the
1618
`simulate_without_noise` method.
1719
Example (AMICI): https://bit.ly/33SUSG4
@@ -32,10 +34,14 @@ class Simulator(abc.ABC):
3234
directory and its contents may be modified and deleted, and
3335
should be considered ephemeral.
3436
"""
35-
def __init__(self,
36-
petab_problem: petab.Problem,
37-
working_dir: Optional[Union[pathlib.Path, str]] = None):
38-
"""
37+
38+
def __init__(
39+
self,
40+
petab_problem: petab.Problem,
41+
working_dir: Optional[Union[pathlib.Path, str]] = None,
42+
):
43+
"""Initialize the simulator.
44+
3945
Initialize the simulator with sufficient information to perform a
4046
simulation. If no working directory is specified, a temporary one is
4147
created.
@@ -64,14 +70,16 @@ def __init__(self,
6470
self.rng = np.random.default_rng()
6571

6672
def remove_working_dir(self, force: bool = False, **kwargs) -> None:
67-
"""
68-
Remove the simulator working directory and all files within (see the
69-
`__init__` method arguments).
73+
"""Remove the simulator working directory, and all files within.
74+
75+
See the `__init__` method arguments.
7076
7177
Arguments:
7278
force:
7379
If True, the working directory is removed regardless of
7480
whether it is a temporary directory.
81+
**kwargs:
82+
Additional keyword arguments are passed to `shutil.rmtree`.
7583
"""
7684
if force or self.temporary_working_dir:
7785
shutil.rmtree(self.working_dir, **kwargs)
@@ -85,17 +93,18 @@ def remove_working_dir(self, force: bool = False, **kwargs) -> None:
8593

8694
@abc.abstractmethod
8795
def simulate_without_noise(self) -> pd.DataFrame:
88-
"""
89-
Simulate a PEtab problem. This is an abstract method that should be
90-
implemented in a simulation package. Links to examples of this are in
91-
the class docstring.
96+
"""Simulate the PEtab problem.
97+
98+
This is an abstract method that should be implemented with a simulation
99+
package. Examples of this are referenced in the class docstring.
92100
93101
Returns:
94102
Simulated data, as a PEtab measurements table, which should be
95103
equivalent to replacing all values in the `petab.C.MEASUREMENT`
96104
column of the measurements table (of the PEtab problem supplied to
97105
the `__init__` method), with simulated values.
98106
"""
107+
raise NotImplementedError
99108

100109
def simulate(
101110
self,
@@ -109,6 +118,9 @@ def simulate(
109118
noise: If True, noise is added to simulated data.
110119
noise_scaling_factor:
111120
A multiplier of the scale of the noise distribution.
121+
**kwargs:
122+
Additional keyword arguments are passed to
123+
`simulate_without_noise`.
112124
113125
Returns:
114126
Simulated data, as a PEtab measurements table.
@@ -122,6 +134,7 @@ def add_noise(
122134
self,
123135
simulation_df: pd.DataFrame,
124136
noise_scaling_factor: float = 1,
137+
**kwargs
125138
) -> pd.DataFrame:
126139
"""Add noise to simulated data.
127140
@@ -130,6 +143,8 @@ def add_noise(
130143
A PEtab measurements table that contains simulated data.
131144
noise_scaling_factor:
132145
A multiplier of the scale of the noise distribution.
146+
**kwargs:
147+
Additional keyword arguments are passed to `sample_noise`.
133148
134149
Returns:
135150
Simulated data with noise, as a PEtab measurements table.
@@ -143,6 +158,7 @@ def add_noise(
143158
self.noise_formulas,
144159
self.rng,
145160
noise_scaling_factor,
161+
**kwargs,
146162
)
147163
for _, row in simulation_df_with_noise.iterrows()
148164
]
@@ -156,6 +172,7 @@ def sample_noise(
156172
noise_formulas: Optional[Dict[str, sp.Expr]] = None,
157173
rng: Optional[np.random.Generator] = None,
158174
noise_scaling_factor: float = 1,
175+
zero_bounded: bool = False,
159176
) -> float:
160177
"""Generate a sample from a PEtab noise distribution.
161178
@@ -176,6 +193,10 @@ def sample_noise(
176193
A NumPy random generator.
177194
noise_scaling_factor:
178195
A multiplier of the scale of the noise distribution.
196+
zero_bounded:
197+
Return zero if the sign of the return value and `simulated_value`
198+
differ. Can be used to ensure non-negative and non-positive values,
199+
if the sign of `simulated_value` should not change.
179200
180201
Returns:
181202
The sample from the PEtab noise distribution.
@@ -200,9 +221,20 @@ def sample_noise(
200221
.loc[measurement_row[petab.C.OBSERVABLE_ID]]
201222
.get(petab.C.NOISE_DISTRIBUTION, petab.C.NORMAL)
202223
)
224+
# an empty noise distribution column in an observables table can result in
225+
# `noise_distribution == float('nan')`
226+
if pd.isna(noise_distribution):
227+
noise_distribution = petab.C.NORMAL
203228

204229
# below is e.g.: `np.random.normal(loc=simulation, scale=noise_value)`
205-
return getattr(rng, noise_distribution)(
230+
simulated_value_with_noise = getattr(rng, noise_distribution)(
206231
loc=simulated_value,
207232
scale=noise_value * noise_scaling_factor
208233
)
234+
235+
if (
236+
zero_bounded and
237+
np.sign(simulated_value) != np.sign(simulated_value_with_noise)
238+
):
239+
return 0.0
240+
return simulated_value_with_noise

petab/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
"""PEtab library version"""
2-
__version__ = '0.1.13'
2+
__version__ = '0.1.14'

tests/test_simulate.py

Lines changed: 69 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -65,15 +65,67 @@ def test_remove_working_dir(petab_problem):
6565
assert not pathlib.Path(simulator.working_dir).is_dir()
6666

6767

68+
def test_zero_bounded(petab_problem):
69+
"""Test `zero_bounded` argument of `sample_noise`."""
70+
positive = np.spacing(1)
71+
negative = -positive
72+
73+
simulator = TestSimulator(petab_problem)
74+
# Set the random seed to ensure predictable tests.
75+
simulator.rng = np.random.default_rng(seed=0)
76+
77+
# Set approximately half of the measurements to negative values, and the
78+
# rest to positive values.
79+
n_measurements = len(petab_problem.measurement_df)
80+
neg_indices = range(round(n_measurements / 2))
81+
pos_indices = range(len(neg_indices), n_measurements)
82+
measurements = [
83+
negative if index in neg_indices else
84+
(positive if index in pos_indices else np.nan)
85+
for index in range(n_measurements)
86+
]
87+
synthetic_data_df = simulator.simulate().assign(**{
88+
petab.C.MEASUREMENT: measurements
89+
})
90+
# All measurements are non-zero
91+
assert (synthetic_data_df['measurement'] != 0).all()
92+
# No measurements are NaN
93+
assert not (np.isnan(synthetic_data_df['measurement'])).any()
94+
95+
synthetic_data_df_with_noise = simulator.add_noise(
96+
synthetic_data_df,
97+
)
98+
# Both negative and positive values are returned by default.
99+
assert all([
100+
(synthetic_data_df_with_noise['measurement'] <= 0).any(),
101+
(synthetic_data_df_with_noise['measurement'] >= 0).any(),
102+
])
103+
104+
synthetic_data_df_with_noise = simulator.add_noise(
105+
synthetic_data_df,
106+
zero_bounded=True,
107+
)
108+
# Values with noise that are different in sign to values without noise are
109+
# zeroed.
110+
assert all([
111+
(synthetic_data_df_with_noise['measurement'][neg_indices] <= 0).all(),
112+
(synthetic_data_df_with_noise['measurement'][pos_indices] >= 0).all(),
113+
(synthetic_data_df_with_noise['measurement'][neg_indices] == 0).any(),
114+
(synthetic_data_df_with_noise['measurement'][pos_indices] == 0).any(),
115+
(synthetic_data_df_with_noise['measurement'][neg_indices] < 0).any(),
116+
(synthetic_data_df_with_noise['measurement'][pos_indices] > 0).any(),
117+
])
118+
119+
68120
def test_add_noise(petab_problem):
69121
"""Test the noise generating method."""
70122

71123
tested_noise_distributions = {'normal', 'laplace'}
72124
assert set(petab.C.NOISE_MODELS) == tested_noise_distributions, (
73125
'The noise generation methods have only been tested for '
74-
f'{tested_noise_distributions}. Please edit this test '
75-
'to include this distribution in its tested distributions. The '
76-
'appropriate SciPy distribution will need to be added to '
126+
f'{tested_noise_distributions}. Please edit this test to include this '
127+
'distribution in its tested distributions. The appropriate SciPy '
128+
'distribution will need to be added to '
77129
'`petab_numpy2scipy_distribution` in `_test_add_noise`.'
78130
)
79131

@@ -94,6 +146,8 @@ def _test_add_noise(petab_problem) -> None:
94146
}
95147

96148
simulator = TestSimulator(petab_problem)
149+
# Set the random seed to ensure predictable tests.
150+
simulator.rng = np.random.default_rng(seed=0)
97151
synthetic_data_df = simulator.simulate()
98152

99153
# Generate samples of noisy data
@@ -144,26 +198,30 @@ def row2cdf(row, index) -> Callable:
144198
getattr(
145199
scipy.stats,
146200
petab_numpy2scipy_distribution[
147-
expected_noise_distributions[index]]
148-
).cdf, loc=row[MEASUREMENT], scale=expected_noise_values[index])
201+
expected_noise_distributions[index]
202+
]
203+
).cdf,
204+
loc=row[MEASUREMENT],
205+
scale=expected_noise_values[index]
206+
)
149207

150208
# Test whether the distribution of the samples is equal to the expected
151209
# distribution, for each measurement.
152210
results = []
153211
for index, row in synthetic_data_df.iterrows():
154-
r = scipy.stats.ks_1samp(
212+
results.append(scipy.stats.ks_1samp(
155213
samples[:, index],
156214
row2cdf(row, index)
157-
)
158-
results.append(r)
215+
))
159216
observed_fraction_above_threshold = (
160-
sum(r.pvalue > ks_1samp_pvalue_threshold for r in results) /
161-
len(results)
217+
sum(r.pvalue > ks_1samp_pvalue_threshold for r in results)
218+
/ len(results)
162219
)
163220
# Sufficient distributions of measurement samples are sufficiently similar
164221
# to the expected distribution
165222
assert (
166-
observed_fraction_above_threshold > minimum_fraction_above_threshold)
223+
observed_fraction_above_threshold > minimum_fraction_above_threshold
224+
)
167225

168226
simulator.remove_working_dir()
169227
assert not pathlib.Path(simulator.working_dir).is_dir()

0 commit comments

Comments
 (0)