Merge pull request #3 from gsd-authors/fit

krzysztofrusek · web-flow · commit 40db0c845da2 · 2023-10-12T19:21:43.000+02:00
Fit
diff --git a/README.md b/README.md
@@ -43,8 +43,18 @@ $ hatch shell
 
 ### Running tests
 
-Gsd uses [Pytest](https://pytest.org/) for testing. To run the tests, use the following command:
+Gsd uses unitest for testing. To run the tests, use the following command:
 
 ```
 $ hatch run test 
 ```
+
+### Standalone estimator
+
+You can quickly estimate GSD parameters from a command line interface
+
+```shell
+python3 -m gsd 0 12 13 4 0
+```
+
+    GSDParams(psi=Array(2.6272388, dtype=float32), rho=Array(0.9041536, dtype=float32))
diff --git a/src/gsd/__init__.py b/src/gsd/__init__.py
@@ -1,4 +1,7 @@
-__version__ = '0.0.4'
+__version__ = '0.0.5'
 
+from gsd.fit import GSDParams
+from gsd.fit import fit_mle
+from gsd.fit import fit_moments
 from gsd.gsd import (log_prob, sample, mean, variance)
 from gsd.ref_prob import gsd_prob
diff --git a/src/gsd/__main__.py b/src/gsd/__main__.py
@@ -1,20 +1,15 @@
-import gsd
-import jax
+import argparse
 import jax.numpy as jnp
-
+from gsd import fit_mle
 
 if __name__ == '__main__':
-    gsd.log_prob(1., 0.5, 2)
-    m=gsd.mean(3.,0.7)
-    v = gsd.variance(3.,0.7)
-    k = jax.random.key(43)
-    s = gsd.sample(3.,0.7,(24,),k)
-
-    jnp.mean(s), jnp.var(s)
-
-    #jax.vmap(gsd.log_prob, in_axes=(None,None,0))(3.,0.7,s)
-
+    parser = argparse.ArgumentParser(description='GSD estimator')
 
+    parser.add_argument("response", nargs=5, type=int,
+                        metavar=("num1", "num2", "num3", "num4", "num5"),
+                        help="List of 5 counts")
 
+    args = parser.parse_args()
 
-    print('test')
+    hat,_ = fit_mle(data=jnp.asarray(args.response, dtype=jnp.float32))
+    print(hat)
diff --git a/src/gsd/fit.py b/src/gsd/fit.py
@@ -0,0 +1,111 @@
+from typing import NamedTuple
+
+import jax
+import jax.numpy as jnp
+import jax.tree_util as jtu
+from jax import Array
+from jax.typing import ArrayLike
+
+from .gsd import vmax, vmin, log_prob
+
+
+class GSDParams(NamedTuple):
+    """NamedTuple representing parameters for the Generalized Structure Distribution (GSD).
+
+    This class is used to store the psi and  rho parameters for the GSD.
+    It provides a convenient way to group these parameters together for use in various
+    statistical and modeling applications.
+    """
+    psi: Array
+    rho: Array
+
+
+@jax.jit
+def fit_moments(data: ArrayLike) -> GSDParams:
+    """Fits GSD using moments estimator
+
+    :param data: A 5d Array of counts of each response.
+    :return: GSD Parameters
+    """
+    psi = jnp.dot(data, jnp.arange(1, 6)) / jnp.sum(data)
+    V = jnp.dot(data, jnp.arange(1, 6) ** 2) / jnp.sum(data) - psi ** 2
+    return GSDParams(psi=psi, rho=(vmax(psi) - V) / (vmax(psi) - vmin(psi)))
+
+
+class OptState(NamedTuple):
+    """A class representing the state of an optimization process.
+
+    Attributes:
+    :param params (GSDParams): The current optimization parameters.
+    :param previous_params (GSDParams): The previous optimization parameters.
+    :param count (int): An integer count indicating the step or iteration of the optimization process.
+
+    This class is used to store and manage the state of an optimization algorithm, allowing you
+    to keep track of the current parameters, previous parameters, and the step count.
+
+    """
+    params: GSDParams
+    previous_params: GSDParams
+    count: int
+
+
+@jax.jit
+def fit_mle(data: ArrayLike, max_iterations: int = 100, log_lr_min: ArrayLike = -15, log_lr_max: ArrayLike = 2.,
+            num_lr: ArrayLike = 10) -> tuple[GSDParams, OptState]:
+    """Finds the maximum likelihood estimator of the GSD parameters.
+    The algorithm used here is a simple gradient ascent.
+    We use the concept of projected gradient to enforce constraints for parameters (psi in [1, 5], rho in [0, 1]) and exhaustive search for line search along the gradient.
+
+    :param data: 5D array of counts for each response.
+    :param max_iterations: Maximum number of iterations.
+    :param log_lr_min: Log2 of the smallest learning rate.
+    :param log_lr_max: Log2 of the largest learning rate.
+    :param num_lr: Number of learning rates to check during the line search.
+
+    :return: An opt state whore params filed contains estimated values of GSD Parameters
+    """
+
+    def ll(theta: GSDParams) -> Array:
+        logits = jax.vmap(log_prob, (None, None, 0), (0))(theta.psi, theta.rho, jnp.arange(1, 6))
+        return jnp.dot(data, logits) / jnp.sum(data)
+
+    grad_ll = jax.grad(ll)
+    theta0 = fit_moments(data)
+    rate = jnp.concatenate([jnp.zeros((1,)), jnp.logspace(log_lr_min, log_lr_max, num_lr, base=2.)])
+
+    def update(tg, t, lo, hi):
+        '''
+        :param tg: gradient
+        :param t: theta parameters
+        :param lo: lower bound
+        :param hi: upper bound
+        :return: updated params
+        '''
+        nt = t + rate * jnp.where(jnp.isnan(tg), 0., tg)
+        _nt = jnp.where(nt < lo, lo, nt)
+        _nt = jnp.where(_nt > hi, hi, _nt)
+        return _nt
+
+    lo = GSDParams(psi=1., rho=0.)
+    hi = GSDParams(psi=5., rho=1.)
+
+    def body_fun(state: OptState) -> OptState:
+        t, _, count = state
+        g = grad_ll(t)
+        new_theta = jtu.tree_map(update, g, t, lo, hi)
+        new_lls = jax.vmap(ll)(new_theta)
+        max_idx = jnp.argmax(jnp.where(jnp.isnan(new_lls), -jnp.inf, new_lls))
+        return OptState(params=jtu.tree_map(lambda t: t[max_idx], new_theta), previous_params=t, count=count + 1)
+
+    def cond_fun(state: OptState) -> bool:
+        tn, tnm1, c = state
+        should_stop = jnp.logical_or(jnp.all(jnp.array(tn) == jnp.array(tnm1)), c > max_iterations)
+        # stop on NaN
+        should_stop = jnp.logical_or(should_stop, jnp.any(jnp.isnan(jnp.array(tn))))
+        return jnp.logical_not(should_stop)
+
+    opt_state = jax.lax.while_loop(cond_fun, body_fun,
+                                   OptState(params=theta0, previous_params=jtu.tree_map(lambda _: jnp.inf, theta0),
+                                            count=0))
+    return opt_state.params, opt_state
+
diff --git a/src/gsd/gsd.py b/src/gsd/gsd.py
@@ -1,31 +1,38 @@
-from typing import Any, Sequence
+from typing import Sequence
 
 import jax
 import jax.numpy as jnp
-
 import numpy as np
-from jax.scipy.special import betaln
-from jax.typing import ArrayLike
 from jax import Array
 from jax.random import PRNGKeyArray
+from jax.scipy.special import betaln
+from jax.typing import ArrayLike
 
 Shape = Sequence[int]
 
 N = 5
 
 
 def logbinom(n: ArrayLike, k: ArrayLike) -> Array:
-    '''
-    log of `n choose k`
-    '''
+    """ Stable log of `n choose k` """
     return -jnp.log1p(n) - betaln(n - k + 1., k + 1.)
 
 
 def vmin(psi: ArrayLike) -> Array:
+    """Compute the minimal possible variance for give mean
+
+    :param psi: mean
+    :return: variance
+    """
     return (jnp.ceil(psi) - psi) * (psi - jnp.floor(psi))
 
 
 def vmax(psi: ArrayLike) -> Array:
+    """Compute the maximal possible variance for give mean
+
+    :param psi: mean
+    :return: variance
+    """
     return (psi - 1.) * (5 - psi)
 
 
@@ -34,6 +41,13 @@ def _C(Vmax: ArrayLike, Vmin: ArrayLike) -> Array:
 
 
 def log_prob(psi: ArrayLike, rho: ArrayLike, k: ArrayLike) -> Array:
+    """Compute log probability of the response k for given parameters.
+
+    :param psi: mean
+    :param rho: dispersion
+    :param k: response
+    :return: log of the probability in GSD distribution
+    """
     index = jnp.arange(0, 6)
     almost_neg_inf = np.log(1e-10)
 
@@ -48,33 +62,41 @@ def log_prob(psi: ArrayLike, rho: ArrayLike, k: ArrayLike) -> Array:
     b0 = jnp.log(jnp.zeros_like(index))
     b0 = b0.at[0].set(jnp.log((5. - psi) / 4.))
     b0 = b0.at[4].set(jnp.log((psi - 1.) / 4.))
-    beta_bin_part = jnp.where(rho == 0.0, b0[k-1],beta_bin_part)
-
+    beta_bin_part = jnp.where(rho == 0.0, b0[k - 1], beta_bin_part)
 
     min_var_part = jax.nn.relu(1. - jnp.abs(k - psi))
     log_min_var_part = jnp.where(rho < C, 0., jnp.log(rho - C)) - jnp.log1p(-C) + jnp.log(min_var_part)
     log_bin_part = jnp.log1p(-rho) - jnp.log1p(-C) + logbinom(4, k - 1.) + (k - 1) * (jnp.log(psi - 1) - jnp.log(4)) + (
             5 - k) * (jnp.log(5 - psi) - jnp.log(4))
 
-
     logmix = jnp.logaddexp(jnp.where(min_var_part == 0, almost_neg_inf, log_min_var_part), log_bin_part)
 
-    logmix = jnp.where(rho==1.0,jnp.log(min_var_part),logmix)
-    #logmix = jnp.where(min_var_part == 0, log_bin_part, logmix)
+    logmix = jnp.where(rho == 1.0, jnp.log(min_var_part), logmix)
+    # logmix = jnp.where(min_var_part == 0, log_bin_part, logmix)
 
     return jnp.where(rho < C, beta_bin_part, logmix)
 
 
 def mean(psi: ArrayLike, rho: ArrayLike) -> Array:
+    """Mean of GSD distribution"""
     del rho
     return psi
 
 
 def variance(psi: ArrayLike, rho: ArrayLike) -> Array:
+    """Variance of GSD distribution"""
     return rho * vmin(psi) + (1 - rho) * vmax(psi)
 
 
 def sample(psi: ArrayLike, rho: ArrayLike, shape: Shape, key: PRNGKeyArray) -> Array:
+    """Sample from GSD
+
+    :param psi: mean
+    :param rho: dispersion
+    :param shape: sample shape
+    :param key: random key
+    :return: Array of shape :param shape:
+    """
     index = jnp.arange(1, N + 1)
     logits = jax.vmap(log_prob, in_axes=(None, None, 0))(psi, rho, index)
     return jax.random.categorical(key, logits, shape=shape) + 1
diff --git a/tests/fit.py b/tests/fit.py
@@ -0,0 +1,49 @@
+from jax import config
+
+config.update("jax_enable_x64", True)
+import jax
+import jax.numpy as jnp
+import numpy as np
+from scipy.integrate import dblquad
+
+from gsd import log_prob
+
+if __name__ == '__main__':
+    data = jnp.asarray([5, 12, 3, 0, 0])
+    k = jnp.arange(1, 6)
+
+
+    @jax.jit
+    def posterior(psi, rho):
+        log_posterior = jax.vmap(log_prob, in_axes=(None, None, 0))(psi, rho, k) @ data + 1. + 1 / 4.
+        posterior = jnp.exp(log_posterior)
+        return posterior
+
+
+    epsabs = 1e-14
+    epsreal = 1e-11
+
+    Z, Zerr = dblquad(posterior, a=0, b=1, gfun=lambda x: 1., hfun=lambda x: 5., epsabs=epsabs, epsrel=epsreal)
+    psi_hat, _ = dblquad(jax.jit(lambda psi, rho: psi * posterior(psi, rho)), a=0, b=1, gfun=lambda x: 1.,
+                         hfun=lambda x: 5.,
+                         epsabs=epsabs, epsrel=epsreal)
+    psi_hat = psi_hat / Z
+    rho_hat, _ = dblquad(jax.jit(lambda psi, rho: rho * posterior(psi, rho)), a=0, b=1, gfun=lambda x: 1.,
+                         hfun=lambda x: 5.,
+                         epsabs=epsabs, epsrel=epsreal)
+    rho_hat = rho_hat / Z
+
+    psi_ci, _ = dblquad(jax.jit(lambda psi, rho: (psi_hat - psi) ** 2 * posterior(psi, rho)), a=0, b=1,
+                        gfun=lambda x: 1., hfun=lambda x: 5.,
+                        epsabs=epsabs, epsrel=epsreal)
+
+    psi_ci = np.sqrt(psi_ci / Z)
+
+    rho_ci, _ = dblquad(jax.jit(lambda psi, rho: (rho_hat - rho) ** 2 * posterior(psi, rho)), a=0, b=1,
+                        gfun=lambda x: 1., hfun=lambda x: 5.,
+                        epsabs=epsabs, epsrel=epsreal)
+
+    rho_ci = np.sqrt(rho_ci / Z)
+
+    k @ data / data.sum()
+    pass
diff --git a/tests/fit_test.py b/tests/fit_test.py
@@ -0,0 +1,18 @@
+import unittest
+import jax.numpy as jnp
+
+import gsd.fit
+
+
+class FitTestCase(unittest.TestCase):
+    def test_mle(self):
+        #                 1  2  3 4 5
+        data=jnp.asarray([0,10,10,0,0.])
+        _,os = gsd.fit.fit_mle(data)
+        self.assertAlmostEqual(os.params.psi, 2.5)
+        self.assertAlmostEqual(os.params.rho, 1)
+
+
+
+if __name__ == '__main__':
+    unittest.main()