|
| 1 | +"""Probability distributions used by PEtab.""" |
| 2 | +from __future__ import annotations |
| 3 | + |
| 4 | +import abc |
| 5 | +from typing import Literal |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pandas as pd |
| 9 | +from scipy.stats import laplace, lognorm, norm, uniform |
| 10 | + |
| 11 | +from . import C |
| 12 | + |
| 13 | + |
| 14 | +class Distribution(abc.ABC): |
| 15 | + """A univariate probability distribution. |
| 16 | +
|
| 17 | + :param type_: The type of the distribution. |
| 18 | + :param transformation: The transformation to be applied to the sample. |
| 19 | + Ignored if `parameter_scale` is `True`. |
| 20 | + :param parameters: The parameters of the distribution. |
| 21 | + :param bounds: The bounds of the distribution (lower, upper). |
| 22 | + :param parameter_scale: Whether the parameters are already on the correct |
| 23 | + scale. If `False`, the parameters are transformed to the correct scale. |
| 24 | + If `True`, the parameters are assumed to be on the correct scale and |
| 25 | + no transformation is applied. |
| 26 | + :param transformation: The transformation of the distribution. |
| 27 | +
|
| 28 | + """ |
| 29 | + |
| 30 | + _type_to_cls: dict[str, type[Distribution]] = {} |
| 31 | + |
| 32 | + def __init__( |
| 33 | + self, |
| 34 | + type_: str, |
| 35 | + transformation: str, |
| 36 | + parameters: tuple, |
| 37 | + bounds: tuple = None, |
| 38 | + parameter_scale: bool = False, |
| 39 | + ): |
| 40 | + self.type = type_ |
| 41 | + self.parameters = parameters |
| 42 | + self.bounds = bounds |
| 43 | + self.transformation = transformation |
| 44 | + self.parameter_scale = parameter_scale |
| 45 | + |
| 46 | + def __repr__(self): |
| 47 | + return f"{self.__class__.__name__}({self.__dict__})" |
| 48 | + |
| 49 | + @abc.abstractmethod |
| 50 | + def sample(self, shape=None) -> np.ndarray: |
| 51 | + """Sample from the distribution. |
| 52 | +
|
| 53 | + :param shape: The shape of the sample. |
| 54 | + :return: A sample from the distribution. |
| 55 | + """ |
| 56 | + ... |
| 57 | + |
| 58 | + def _scale_sample(self, sample): |
| 59 | + """Scale the sample to the parameter space""" |
| 60 | + if self.parameter_scale: |
| 61 | + return sample |
| 62 | + if self.transformation == C.LIN: |
| 63 | + return sample |
| 64 | + if self.transformation == C.LOG: |
| 65 | + return np.log(sample) |
| 66 | + if self.transformation == C.LOG10: |
| 67 | + return np.log10(sample) |
| 68 | + raise NotImplementedError( |
| 69 | + f"Transformation {self.transformation} not implemented." |
| 70 | + ) |
| 71 | + |
| 72 | + def _clip_to_bounds(self, x): |
| 73 | + """Clip values in array x to bounds.""" |
| 74 | + # TODO: replace this by proper truncation |
| 75 | + if self.bounds is None: |
| 76 | + return x |
| 77 | + |
| 78 | + return np.maximum( |
| 79 | + np.minimum(self._scale_sample(self.bounds[1]), x), |
| 80 | + self._scale_sample(self.bounds[0]), |
| 81 | + ) |
| 82 | + |
| 83 | + @abc.abstractmethod |
| 84 | + def pdf(self, x): |
| 85 | + """Probability density function at x. |
| 86 | +
|
| 87 | + ``x`` is assumed to be on the parameter scale. |
| 88 | + """ |
| 89 | + ... |
| 90 | + |
| 91 | + @staticmethod |
| 92 | + def from_par_dict( |
| 93 | + d, type_=Literal["initialization", "objective"] |
| 94 | + ) -> Distribution: |
| 95 | + """Create a distribution from a row of the parameter table. |
| 96 | +
|
| 97 | + :param d: A dictionary representing a row of the parameter table. |
| 98 | + :return: A distribution object. |
| 99 | + """ |
| 100 | + dist_type = d.get(f"{type_}PriorType", C.NORMAL) |
| 101 | + if not isinstance(dist_type, str) and np.isnan(dist_type): |
| 102 | + dist_type = C.PARAMETER_SCALE_UNIFORM |
| 103 | + |
| 104 | + cls = Distribution._type_to_cls[dist_type] |
| 105 | + if ( |
| 106 | + pd.isna(d[f"{type_}PriorParameters"]) |
| 107 | + and dist_type == C.PARAMETER_SCALE_UNIFORM |
| 108 | + ): |
| 109 | + params = d[C.LOWER_BOUND], d[C.UPPER_BOUND] |
| 110 | + else: |
| 111 | + params = tuple( |
| 112 | + map( |
| 113 | + float, |
| 114 | + d[f"{type_}PriorParameters"].split(C.PARAMETER_SEPARATOR), |
| 115 | + ) |
| 116 | + ) |
| 117 | + return cls( |
| 118 | + *params, |
| 119 | + bounds=(d[C.LOWER_BOUND], d[C.UPPER_BOUND]), |
| 120 | + transformation=d.get(C.PARAMETER_SCALE, C.LIN), |
| 121 | + ) |
| 122 | + |
| 123 | + |
| 124 | +class Normal(Distribution): |
| 125 | + """A normal distribution.""" |
| 126 | + |
| 127 | + def __init__( |
| 128 | + self, |
| 129 | + mean: float, |
| 130 | + std: float, |
| 131 | + bounds: tuple = None, |
| 132 | + transformation: str = C.LIN, |
| 133 | + ): |
| 134 | + super().__init__( |
| 135 | + C.NORMAL, |
| 136 | + transformation=transformation, |
| 137 | + parameters=(mean, std), |
| 138 | + bounds=bounds, |
| 139 | + parameter_scale=False, |
| 140 | + ) |
| 141 | + |
| 142 | + def sample(self, shape=None): |
| 143 | + sample = np.random.normal( |
| 144 | + loc=self.parameters[0], scale=self.parameters[1], size=shape |
| 145 | + ) |
| 146 | + return self._clip_to_bounds(self._scale_sample(sample)) |
| 147 | + |
| 148 | + def pdf(self, x): |
| 149 | + return norm.pdf(x, loc=self.parameters[0], scale=self.parameters[1]) |
| 150 | + |
| 151 | + |
| 152 | +class LogNormal(Distribution): |
| 153 | + """A log-normal distribution.""" |
| 154 | + |
| 155 | + def __init__( |
| 156 | + self, |
| 157 | + mean: float, |
| 158 | + std: float, |
| 159 | + bounds: tuple = None, |
| 160 | + transformation: str = C.LIN, |
| 161 | + ): |
| 162 | + super().__init__( |
| 163 | + C.LOG_NORMAL, |
| 164 | + transformation=transformation, |
| 165 | + parameters=(mean, std), |
| 166 | + bounds=bounds, |
| 167 | + parameter_scale=False, |
| 168 | + ) |
| 169 | + |
| 170 | + def sample(self, shape=None): |
| 171 | + sample = np.random.lognormal( |
| 172 | + mean=self.parameters[0], sigma=self.parameters[1], size=shape |
| 173 | + ) |
| 174 | + return self._clip_to_bounds(self._scale_sample(sample)) |
| 175 | + |
| 176 | + def pdf(self, x): |
| 177 | + return lognorm.pdf( |
| 178 | + x, loc=self.parameters[0], scale=np.exp(self.parameters[1]) |
| 179 | + ) |
| 180 | + |
| 181 | + |
| 182 | +class Uniform(Distribution): |
| 183 | + """A uniform distribution.""" |
| 184 | + |
| 185 | + def __init__( |
| 186 | + self, |
| 187 | + lower_bound: float, |
| 188 | + upper_bound: float, |
| 189 | + bounds: tuple = None, |
| 190 | + transformation: str = C.LIN, |
| 191 | + ): |
| 192 | + super().__init__( |
| 193 | + C.UNIFORM, |
| 194 | + transformation=transformation, |
| 195 | + parameters=(lower_bound, upper_bound), |
| 196 | + bounds=bounds, |
| 197 | + parameter_scale=False, |
| 198 | + ) |
| 199 | + |
| 200 | + def sample(self, shape=None): |
| 201 | + sample = np.random.uniform( |
| 202 | + low=self.parameters[0], high=self.parameters[1], size=shape |
| 203 | + ) |
| 204 | + return self._clip_to_bounds(self._scale_sample(sample)) |
| 205 | + |
| 206 | + def pdf(self, x): |
| 207 | + return uniform.pdf( |
| 208 | + x, |
| 209 | + loc=self.parameters[0], |
| 210 | + scale=self.parameters[1] - self.parameters[0], |
| 211 | + ) |
| 212 | + |
| 213 | + |
| 214 | +class Laplace(Distribution): |
| 215 | + """A Laplace distribution.""" |
| 216 | + |
| 217 | + def __init__( |
| 218 | + self, |
| 219 | + mean: float, |
| 220 | + scale: float, |
| 221 | + bounds: tuple = None, |
| 222 | + transformation: str = C.LIN, |
| 223 | + ): |
| 224 | + super().__init__( |
| 225 | + C.LAPLACE, |
| 226 | + transformation=transformation, |
| 227 | + parameters=(mean, scale), |
| 228 | + bounds=bounds, |
| 229 | + parameter_scale=False, |
| 230 | + ) |
| 231 | + |
| 232 | + def sample(self, shape=None): |
| 233 | + sample = np.random.laplace( |
| 234 | + loc=self.parameters[0], scale=self.parameters[1], size=shape |
| 235 | + ) |
| 236 | + return self._clip_to_bounds(self._scale_sample(sample)) |
| 237 | + |
| 238 | + def pdf(self, x): |
| 239 | + return laplace.pdf(x, loc=self.parameters[0], scale=self.parameters[1]) |
| 240 | + |
| 241 | + |
| 242 | +class LogLaplace(Distribution): |
| 243 | + """A log-Laplace distribution.""" |
| 244 | + |
| 245 | + def __init__( |
| 246 | + self, |
| 247 | + mean: float, |
| 248 | + scale: float, |
| 249 | + bounds: tuple = None, |
| 250 | + transformation: str = C.LIN, |
| 251 | + ): |
| 252 | + super().__init__( |
| 253 | + C.LOG_LAPLACE, |
| 254 | + transformation=transformation, |
| 255 | + parameters=(mean, scale), |
| 256 | + bounds=bounds, |
| 257 | + parameter_scale=False, |
| 258 | + ) |
| 259 | + |
| 260 | + @property |
| 261 | + def mean(self): |
| 262 | + """The mean of the underlying Laplace distribution.""" |
| 263 | + return self.parameters[0] |
| 264 | + |
| 265 | + @property |
| 266 | + def scale(self): |
| 267 | + """The scale of the underlying Laplace distribution.""" |
| 268 | + return self.parameters[1] |
| 269 | + |
| 270 | + def sample(self, shape=None): |
| 271 | + sample = np.exp( |
| 272 | + np.random.laplace(loc=self.mean, scale=self.scale, size=shape) |
| 273 | + ) |
| 274 | + return self._clip_to_bounds(self._scale_sample(sample)) |
| 275 | + |
| 276 | + def pdf(self, x): |
| 277 | + return ( |
| 278 | + 1 |
| 279 | + / (2 * self.scale * x) |
| 280 | + * np.exp(-np.abs(np.log(x) - self.mean) / self.scale) |
| 281 | + ) |
| 282 | + |
| 283 | + |
| 284 | +class ParameterScaleNormal(Distribution): |
| 285 | + """A normal distribution with parameters on the parameter scale.""" |
| 286 | + |
| 287 | + def __init__( |
| 288 | + self, |
| 289 | + mean: float, |
| 290 | + std: float, |
| 291 | + bounds: tuple = None, |
| 292 | + transformation: str = C.LIN, |
| 293 | + ): |
| 294 | + super().__init__( |
| 295 | + C.PARAMETER_SCALE_NORMAL, |
| 296 | + transformation=transformation, |
| 297 | + parameters=(mean, std), |
| 298 | + bounds=bounds, |
| 299 | + parameter_scale=True, |
| 300 | + ) |
| 301 | + |
| 302 | + sample = Normal.sample |
| 303 | + pdf = Normal.pdf |
| 304 | + |
| 305 | + |
| 306 | +class ParameterScaleUniform(Distribution): |
| 307 | + """A uniform distribution with parameters on the parameter scale.""" |
| 308 | + |
| 309 | + def __init__( |
| 310 | + self, |
| 311 | + lower_bound: float, |
| 312 | + upper_bound: float, |
| 313 | + bounds: tuple = None, |
| 314 | + transformation: str = C.LIN, |
| 315 | + ): |
| 316 | + super().__init__( |
| 317 | + C.PARAMETER_SCALE_UNIFORM, |
| 318 | + transformation=transformation, |
| 319 | + parameters=(lower_bound, upper_bound), |
| 320 | + bounds=bounds, |
| 321 | + parameter_scale=True, |
| 322 | + ) |
| 323 | + |
| 324 | + sample = Uniform.sample |
| 325 | + pdf = Uniform.pdf |
| 326 | + |
| 327 | + |
| 328 | +class ParameterScaleLaplace(Distribution): |
| 329 | + """A Laplace distribution with parameters on the parameter scale.""" |
| 330 | + |
| 331 | + def __init__( |
| 332 | + self, |
| 333 | + mean: float, |
| 334 | + scale: float, |
| 335 | + bounds: tuple = None, |
| 336 | + transformation: str = C.LIN, |
| 337 | + ): |
| 338 | + super().__init__( |
| 339 | + C.PARAMETER_SCALE_LAPLACE, |
| 340 | + transformation=transformation, |
| 341 | + parameters=(mean, scale), |
| 342 | + bounds=bounds, |
| 343 | + parameter_scale=True, |
| 344 | + ) |
| 345 | + |
| 346 | + sample = Laplace.sample |
| 347 | + pdf = Laplace.pdf |
| 348 | + |
| 349 | + |
| 350 | +Distribution._type_to_cls = { |
| 351 | + C.NORMAL: Normal, |
| 352 | + C.LOG_NORMAL: LogNormal, |
| 353 | + C.UNIFORM: Uniform, |
| 354 | + C.LAPLACE: Laplace, |
| 355 | + C.LOG_LAPLACE: LogLaplace, |
| 356 | + C.PARAMETER_SCALE_NORMAL: ParameterScaleNormal, |
| 357 | + C.PARAMETER_SCALE_UNIFORM: ParameterScaleUniform, |
| 358 | + C.PARAMETER_SCALE_LAPLACE: ParameterScaleLaplace, |
| 359 | +} |
0 commit comments