Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions pymob/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from pymob.sim.config import Config

@click.command()
@click.option("-f", "--file", type=str, default=None,
help="Path to the config file")
@click.option("-c", "--case_study", type=str, default="lotka_volterra_case_study",
help=help.case_study)
@click.option("-s", "--scenario", type=str, default="test_scenario",
Expand All @@ -22,9 +24,13 @@
help="The number of cores to be used for multiprocessing")
@click.option("--inference_backend", type=str, default="pymoo")
@click.option("--only-report", type=bool, is_flag=True, default=False)
def main(case_study, scenario, package, output, random_seed, n_cores, inference_backend, only_report):
def main(file, case_study, scenario, package, output, random_seed, n_cores, inference_backend, only_report):

cfg = os.path.join(package, case_study, "scenarios", scenario, "settings.cfg")
if file is None:
cfg = os.path.join(package, case_study, "scenarios", scenario, "settings.cfg")
else:
cfg = file

config = Config(cfg)
config.case_study.name = case_study
config.case_study.scenario = scenario
Expand All @@ -51,6 +57,7 @@ def main(case_study, scenario, package, output, random_seed, n_cores, inference_

sim.posterior_predictive_checks()
sim.report()
sim.export()

# TODO: Migrate to platform independent psutil
# max_ram_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000
Expand Down
21 changes: 17 additions & 4 deletions pymob/inference/numpyro_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
Tuple, Dict, Union, Optional, Callable, Literal, List, Any,
Protocol
)
import importlib

from tqdm import tqdm
import numpy as np
Expand Down Expand Up @@ -143,10 +144,22 @@ def __init__(
self.inference_model = self.parse_probabilistic_model()

if self.user_defined_error_model is not None:
user_error_model = getattr(
self.simulation._prob,
self.user_defined_error_model
)
if "." in self.user_defined_error_model:
# if "." in the name of the model assume that this is the
# fully quailified name of the function (including module)
module, func = self.user_defined_error_model.rsplit(".", 1)

# import the module
_module = importlib.import_module(module)
else:
# if not qualified name
# use plain name, assuming this is the function
func = self.user_defined_error_model

# use already imported _prob module from the case study
_module = self.simulation._prob

user_error_model = getattr(_module, func)

self.inference_model = partial(
self.inference_model,
Expand Down
6 changes: 5 additions & 1 deletion pymob/sim/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def dict_to_string(dct: Dict, replace_whitespace=""):


def list_to_string(lst: List):
return " ".join([str(l) for l in lst])
return " ".join([str(l).replace(" ", "") for l in lst])


def param_to_string(prm: Param):
Expand Down Expand Up @@ -361,6 +361,7 @@ class Casestudy(PymobModel):

output: Optional[str] = None
data: Optional[str] = None
scenario_path_override: Optional[str] = None

observations: Optional[str] = None

Expand Down Expand Up @@ -413,6 +414,9 @@ def default_settings_path(self) -> str:

@property
def scenario_path(self):
if self.scenario_path_override is not None:
return self.scenario_path_override

package_path = os.path.basename(os.path.abspath(self.package))
if package_path == self.name:
# if the package path is identical to the case study path, then
Expand Down
177 changes: 172 additions & 5 deletions pymob/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import copy
import warnings
import importlib
from copy import deepcopy
from typing import Optional, List, Union, Literal, Any, Tuple, Sequence, Mapping
from types import ModuleType
import configparser
Expand All @@ -13,13 +14,14 @@
import numpy as np
from numpy.typing import NDArray
import xarray as xr
import arviz as az
import dpath as dp
from sklearn.preprocessing import MinMaxScaler

import pymob
from pymob.utils.config import lambdify_expression, lookup_args, get_return_arguments
from pymob.utils.errors import errormsg, import_optional_dependency
from pymob.utils.store_file import parse_config_section
from pymob.utils.errors import errormsg, import_optional_dependency, PymobError
from pymob.utils.store_file import parse_config_section, is_number
from pymob.utils.misc import benchmark
from pymob.sim.evaluator import Evaluator, create_dataset_from_dict, create_dataset_from_numpy
from pymob.sim.base import stack_variables
Expand Down Expand Up @@ -275,6 +277,9 @@ def model_parameters(self, value: Dict[Literal["parameters","y0","x_in"],Any]):
raise KeyError(
"'model_parameters' must contain a 'parameters' key"
)

self._check_input_for_nans(model_parameters=value, key="x_in")
self._check_input_for_nans(model_parameters=value, key="y0")

if not isinstance(value["parameters"], dict):
raise ValueError(
Expand All @@ -290,6 +295,52 @@ def model_parameters(self, value: Dict[Literal["parameters","y0","x_in"],Any]):
def _on_params_updated(self, updated_dict):
self.model_parameters = updated_dict

def _check_input_for_nans(self, model_parameters, key):
if key not in model_parameters:
return

for data_var, array in model_parameters[key].items():
nans = array.isnull().sum([
d for d in array.dims
if d == self.config.simulation.x_dimension
])
nans = nans.where(nans, drop=True)

if sum(nans.shape) > 0:
batch_dim = self.config.simulation.batch_dimension
raise PymobError(
f"The xarray passed to `sim.model_parameters['{key}']` contained "+
f"NaN values. They occur at the {batch_dim}-coordinates: "+
f"{nans.coords['id'].values}.\n\n"+

"Why does this error occur?\n"+
"--------------------------\n"+
"Pymob uses y0 as initial conditions for a solver and uses x_in to "+
"provide interpolated values for any value of t. Having nan values "+
"in such components presents an unsolvable challenge to the solver.\n\n" +

"How can I fix this error?\n"+
"-------------------------\n"+
"General advice: Use sim.parse_input https://pymob.readthedocs.io/en/stable/api/pymob.html#pymob.simulation.SimulationBase.parse_input\n"
"* Problem with 'x_in': Check sim.observations and also check"+
"sim.config.simulations.x_in"
"You may need to take a decision how to interpolate "+
"your data. Check out the xarray documentation "+
"https://docs.xarray.dev/en/stable/generated/xarray.DataArray.interpolate_na.html" +
"or https://docs.xarray.dev/en/stable/generated/xarray.DataArray.ffill.html "+
"to replace nan values.\n"+
"* If you want to make sure your 'x_in' follows a rectangular interpolation you can use\n"+
" >>> sim.parse_input('x_in', reference_data=sim.observations)\n"
" >>> pymob.solvers.base.rect_interpolation(x_in)\n"
"* Problem with 'y0': Check sim.observations and also check "+
"sim.config.simulation.y0\n\n"

"Details\n"
"-------\n"
f"{key}: {model_parameters[key]}"
)


@property
def observations(self):
assert isinstance(self._observations, xr.Dataset), "Observations must be an xr.Dataset"
Expand Down Expand Up @@ -348,17 +399,30 @@ def observations(self, value: xr.Dataset):

self.create_data_scaler()

def save_observations(self, filename="observations.nc", force=False):
fp = os.path.join(self.data_path, filename)
def save_observations(self, filename="observations.nc", directory=None, force=False):
if directory is None:
directory = self.data_path

fp = os.path.join(directory, filename)
if filename != self.config.case_study.observations:
self.config.case_study.observations = filename

self._serialize_attrs(self.observations)

if not os.path.exists(fp) or force:
self.observations.to_netcdf(fp)
else:
if input(f"Observations {fp} exist. Overwrite? [y/N]") == "y":
self.observations.to_netcdf(fp)

@staticmethod
def _serialize_attrs(observations):
for key, dv in observations.items():
dv.attrs = {
k: str(v) for k, v in dv.attrs.items()
}

return observations

@property
def coordinates(self):
Expand Down Expand Up @@ -1865,4 +1929,107 @@ def report(self):
# TODO: find a good way to integrate posterior predictive and prior predictive
# into the report. I think their execution should be continued outside of the report,
# but they could be linked (as images) inside the report. This way, the report
# would just have to plot them if available.
# would just have to plot them if available.

def export(self, directory: Optional[str] = None):
"""Exports a SimulationBase object to disk.

Parameters
----------
directory : str
Optional. Specifies the directory where the simulation should be exported to.

This method exports at least two files:
- 'settings.cfg'
- 'observations.nc'.

If the inferer was already run, it additionally exports
- 'idata.nc'

All files are stored to the directory specified in `sim.output_path`
"""
self.config.case_study.data_path = self.output_path
# FIXME: Setting package to . did not work out with use of pymob infer
# Why did I do this? Make sure it checks out
# self.config.case_study.package = "."
self.save_observations(force=True)

if hasattr(self, "inferer"):
backend = type(self.inferer).__name__

if backend == "NumpyroBackend":
self.config.simulation.inferer = "numpyro"
elif backend == "ScipyBackend":
self.config.simulation.inferer = "scipy"
elif backend == "PymooBackend":
self.config.simulation.inferer = "pymoo"
elif backend == "PyabcBackend":
self.config.simulation.inferer = "pyabc"
else:
raise NotImplementedError(f"Backend: {backend} is not implemented.")

if hasattr(self.inferer, "idata"):
self.inferer.idata.to_netcdf(os.path.join(self.output_path, "idata.nc"))
else:
pass
else:
pass

self.config.save(fp=os.path.join(self.output_path,"settings.cfg"), force=True)

@classmethod
def from_directory(cls, directory: str) -> "SimulationBase":
"""Imports a SimulationBase from a directory where the simulation had been
exported to with sim.export()

Parameters
----------
directory : str
The path to the directory, the required contents of the directory are:
'settings.cfg' and 'observations.nc'. Optionally 'idata.nc' can be defined,
which contains the posterior. From this a MempySim with completed inference
can be initialized
"""
cfg_file = os.path.join(directory, "settings.cfg")
sim = cls(config=cfg_file)
sim.setup()

if hasattr(sim.config.simulation, "inferer"):
sim.set_inferer(sim.config.simulation.inferer)
idata = os.path.join(sim.output_path, "idata.nc")
if os.path.exists(idata):
sim.inferer.idata = az.from_netcdf(idata)
else:
pass
else:
pass

return sim

def copy(self):
"""Creates a copy of a SimulationBase object by deepcopying all loose references
TODO: Replace by export(..)->from_directory(...) to a temporary directoy.
Note that the output and data paths should be stored before export and
recovered after import
"""
with warnings.catch_warnings(action="ignore"):
sim_copy = type(self)(self.config.copy(deep=True))
sim_copy.observations = self.observations.copy(deep=True)

# must copy individual parts of the dict due to the on_update method
model_parameters = {k: deepcopy(v) for k, v in self.model_parameters.items()}

# TODO: Refactor this once the parameterize method is removed.
sim_copy.parameterize = partial(sim_copy.parameterize, model_parameters=model_parameters)
sim_copy._model_parameters = ParameterDict(model_parameters, callback=sim_copy._on_params_updated)

sim_copy.load_modules()
if hasattr(self, "inferer"):
sim_copy.inferer = type(self.inferer)(sim_copy)
# idata uses deepcopy by default
sim_copy.inferer.idata = self.inferer.idata.copy()
sim_copy.model = self.model
sim_copy.solver_post_processing = self.solver_post_processing

return sim_copy

2 changes: 2 additions & 0 deletions pymob/utils/store_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ def opt(a, b, c):
return a if a is not None else b if b is not None else c

def is_number(s):
if s is None:
return False
try:
float(s)
return True
Expand Down
Loading