split modelvars, now have to split model graph by gene

davidsebfischer · davidsebfischer · commit 58433f75c4a0 · 2018-12-17T18:59:52.000+01:00
diff --git a/batchglm/pkg_constants.py b/batchglm/pkg_constants.py
@@ -9,6 +9,7 @@
 ACCURACY_MARGIN_RELATIVE_TO_LIMIT = float(os.environ.get('BATCHGLM_ACCURACY_MARGIN', 2.5))
 HESSIAN_MODE = str(os.environ.get('HESSIAN_MODE', "obs_batched"))
 JACOBIAN_MODE = str(os.environ.get('JACOBIAN_MODE', "analytic"))
+DELTA_THETA_MIN_ABS = float(os.environ.get('BATCHGLM_ACCURACY_THETA', 1e-3))
 
 XARRAY_NETCDF_ENGINE = "h5netcdf"
 
diff --git a/batchglm/train/tf/base.py b/batchglm/train/tf/base.py
@@ -287,6 +287,27 @@ def train(self, *args,
                 )
 
                 tf.logging.info("Step: %d\tloss: %f", train_step, global_loss)
+        elif convergence_criteria == "all_converged":
+            train_step = self.session.run(self.model.global_step, feed_dict=feed_dict)
+            theta_current = self.session.run(self.model.model_vars.params)
+            while np.any(self.model.model_vars.converged == False):
+                theta_prev = theta_current
+                train_step, global_loss, _ = self.session.run(
+                    (self.model.global_step, loss, train_op),
+                    feed_dict=feed_dict
+                )
+                theta_current = self.session.run(self.model.model_vars.params)
+                theta_delta = np.abs(theta_prev - theta_current)
+                self.model.model_vars.converged = np.logical_or(  # Only update non-converged.
+                    self.model.model_vars.converged,
+                    np.max(theta_delta, axis=0) < pkg_constants.DELTA_THETA_MIN_ABS
+                )
+                tf.logging.info(
+                    "Step: %d\tloss: %f\t models converged %i",
+                    train_step,
+                    global_loss,
+                    np.sum(self.model.model_vars.converged).astype("int32")
+                )
         else:
             self._train_to_convergence(
                 loss=loss,
diff --git a/batchglm/train/tf/nb_glm/base.py b/batchglm/train/tf/nb_glm/base.py
@@ -212,7 +212,9 @@ def __init__(
         self.probs = probs
         self.log_probs = log_probs
         self.log_likelihood = tf.reduce_sum(self.log_probs, axis=0, name="log_likelihood")
-        self.norm_log_likelihood = tf.reduce_mean(self.log_probs, axis=0, name="log_likelihood")
+        #self.norm_log_likelihood = tf.reduce_mean(self.log_probs, axis=0, name="log_likelihood")
+        self.norm_log_likelihood_bygene = tf.reduce_mean(self.log_probs, axis=0, name="log_likelihood")
+        self.norm_log_likelihood = tf.reduce_mean(self.norm_log_likelihood_bygene, name="log_likelihood")
         self.norm_neg_log_likelihood = - self.norm_log_likelihood
 
         with tf.name_scope("loss"):
@@ -225,6 +227,8 @@ class ModelVars:
     a_var: tf.Variable
     b_var: tf.Variable
     params: tf.Variable
+    converged: np.ndarray
+
     """ Build tf.Variables to be optimzed and their constraints.
 
     a_var and b_var slices of the tf.Variable params which contains
@@ -309,8 +313,13 @@ def __init__(
             axis=0
         ), name="params")
 
-        a_var = params[0:init_a.shape[0]]
-        b_var = params[init_a.shape[0]:]
+        params_by_gene = [tf.expand_dims(params[:, i], axis=-1) for i in range(params.shape[1])]
+        a_by_gene = [x[0:init_a.shape[0],:] for x in params_by_gene]
+        b_by_gene = [x[init_a.shape[0]:, :] for x in params_by_gene]
+        a_var = tf.concat(a_by_gene, axis=1)
+        b_var = tf.concat(b_by_gene, axis=1)
+        #a_var = params[0:init_a.shape[0]]
+        #b_var = params[init_a.shape[0]:]
 
         # Define first layer of computation graph on identifiable variables
         # to yield dependent set of parameters of model for each location
@@ -334,3 +343,9 @@ def __init__(
         self.a_var = a_var
         self.b_var = b_var
         self.params = params
+        # Properties to follow gene-wise convergence.
+        self.params_by_gene = params_by_gene
+        self.a_by_gene = a_by_gene
+        self.b_by_gene = b_by_gene
+        self.converged = np.repeat(a=False, repeats=self.params.shape[1])  # Initialise to non-converged.
+
diff --git a/batchglm/train/tf/nb_glm/estimator.py b/batchglm/train/tf/nb_glm/estimator.py
@@ -149,7 +149,6 @@ def __init__(
             num_design_scale_params,
             graph: tf.Graph = None,
             batch_size=500,
-            feature_batch_size=None,
             init_a=None,
             init_b=None,
             constraints_loc=None,
@@ -163,7 +162,6 @@ def __init__(
         self.num_design_loc_params = num_design_loc_params
         self.num_design_scale_params = num_design_scale_params
         self.batch_size = batch_size
-        self.feature_batch_size = feature_batch_size
 
         # initial graph elements
         with self.graph.as_default():
@@ -305,11 +303,25 @@ def __init__(
             with tf.name_scope("training"):
                 global_step = tf.train.get_or_create_global_step()
 
-                # set up trainers for different selections of variables to train
-                # set up multiple optimization algorithms for each trainer
+                # Set up trainers for different selections of variables to train.
+                # Set up multiple optimization algorithms for each trainer.
+                # Note that params is tf.Variable and a, b are tensors as they are
+                # slices of a variable! Accordingly, the updates are implemented differently.
                 batch_trainers = train_utils.MultiTrainer(
-                    loss=batch_model.norm_neg_log_likelihood,
-                    variables=[model_vars.params],
+                    #loss=batch_model.norm_neg_log_likelihood,  # add only selected features here TODO
+                    #variables=[model_vars.params],  # tf.gather(model_vars.params, indices=np.where(model_vars.converged == False)[0], axis=1)],
+                    gradients=[
+                        (
+                            tf.concat([
+                                tf.gradients(batch_model.norm_neg_log_likelihood,
+                                             model_vars.params_by_gene[i])[0]
+                                if i in np.where(model_vars.converged == False)[0]
+                                else tf.zeros([model_vars.params.shape[0], 1])
+                                for i in range(model_vars.params.shape[1])
+                            ], axis=1),
+                            model_vars.params
+                        ),
+                    ],
                     learning_rate=learning_rate,
                     global_step=global_step,
                     apply_gradients=lambda grad: tf.where(tf.is_nan(grad), tf.zeros_like(grad), grad),
@@ -354,8 +366,20 @@ def __init__(
                     #     [tf.reduce_sum(tf.abs(grad), axis=0) for (grad, var) in batch_trainers.gradient])
 
                 full_data_trainers = train_utils.MultiTrainer(
-                    loss=full_data_model.norm_neg_log_likelihood,
-                    variables=[model_vars.params],
+                    #loss=full_data_model.norm_neg_log_likelihood,
+                    #variables=[tf.gather(model_vars.params, indices=np.where(model_vars.converged == False)[0], axis=1)],
+                    gradients=[
+                        (
+                            tf.concat([
+                                tf.gradients(full_data_model.norm_neg_log_likelihood,
+                                             model_vars.params_by_gene[i])[0]
+                                if i in np.where(model_vars.converged == False)[0]
+                                else tf.zeros([model_vars.params.shape[0], 1])
+                                for i in range(model_vars.params.shape[1])
+                            ], axis=1),
+                            model_vars.params
+                        ),
+                    ],
                     learning_rate=learning_rate,
                     global_step=global_step,
                     apply_gradients=lambda grad: tf.where(tf.is_nan(grad), tf.zeros_like(grad), grad),
@@ -863,7 +887,7 @@ def __init__(
                     init_b = init_scale
 
         # ### prepare fetch_fn:
-        def fetch_fn(idx_obs, idx_genes=None):
+        def fetch_fn(idx):
             r"""
             Documentation of tensorflow coding style in this function:
             tf.py_func defines a python function (the getters of the InputData object slots)
@@ -872,13 +896,8 @@ def fetch_fn(idx_obs, idx_genes=None):
             as explained below.
             """
             # Catch dimension collapse error if idx is only one element long, ie. 0D:
-            if len(idx_obs.shape) == 0:
-                idx_obs = tf.expand_dims(idx_obs, axis=0)
-            if idx_genes is None:
-                idx_genes = ...
-            else:
-                if len(idx_genes.shape) == 0:
-                    idx_genes = tf.expand_dims(idx_genes, axis=0)
+            if len(idx.shape) == 0:
+                idx = tf.expand_dims(idx, axis=0)
 
             X_tensor = tf.py_func(
                 func=input_data.fetch_X,
diff --git a/batchglm/train/tf/train.py b/batchglm/train/tf/train.py
@@ -235,7 +235,7 @@ class MultiTrainer:
     def __init__(self,
                  learning_rate,
                  loss=None,
-                 variables: List = None,
+                 variables: list = None,
                  gradients: list = None,
                  apply_gradients: Union[callable, Dict[tf.Variable, callable]] = None,
                  global_step=None,
diff --git a/batchglm/unit_test/test_nb_glm_featureconvergence.py b/batchglm/unit_test/test_nb_glm_featureconvergence.py
@@ -0,0 +1,58 @@
+from typing import List
+
+import os
+# import sys
+import unittest
+import tempfile
+import logging
+
+import numpy as np
+import scipy.sparse
+
+import batchglm.api as glm
+from batchglm.api.models.nb_glm import Simulator, Estimator, InputData
+
+glm.setup_logging(verbosity="INFO", stream="STDOUT")
+logging.getLogger("tensorflow").setLevel(logging.INFO)
+
+
+def estimate(input_data: InputData):
+
+    estimator = Estimator(input_data, batch_size=500)
+    estimator.initialize()
+
+    estimator.train(
+        convergence_criteria="all_converged",
+        use_batching=False
+    )
+
+    return estimator
+
+
+class NB_GLM_Test(unittest.TestCase):
+    sim: Simulator
+
+    _estims: List[Estimator]
+
+    def setUp(self):
+        self.sim = Simulator(num_observations=1000, num_features=20)
+        self.sim.generate()
+        self._estims = []
+
+    def tearDown(self):
+        for e in self._estims:
+            e.close_session()
+
+    def test_default_fit(self):
+        sim = self.sim.__copy__()
+
+        estimator = estimate(sim.input_data)
+        self._estims.append(estimator)
+
+        # test finalizing
+        estimator = estimator.finalize()
+        return estimator, sim
+
+
+if __name__ == '__main__':
+    unittest.main()