Adds a check in sample for start kwarg shapes (#2462)

jackhansom · aseyboldt · commit 6f95e04d206d · 2017-08-01T20:37:55.000+02:00
* Adds test for shape of start argument

* Adds case for iterable of start arguments and case when there is no shape

* addresses comments to sample start check PR

* adds space between string literals

* adds return to _check_start_shape

* adds unit tests for _check_start_shape
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
@@ -1,4 +1,4 @@
-from collections import defaultdict
+from collections import defaultdict, Sequence
 
 from joblib import Parallel, delayed
 from numpy.random import randint, seed
@@ -144,7 +144,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
     n_init : int
         Number of iterations of initializer
         If 'ADVI', number of iterations, if 'nuts', number of draws.
-    start : dict
+    start : dict, or array of dict
         Starting point in parameter space (or partial point)
         Defaults to trace.point(-1)) if there is a trace provided and
         model.test_point if not (defaults to empty dict).
@@ -227,6 +227,9 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
     """
     model = modelcontext(model)
 
+    if start is not None:
+        _check_start_shape(model, start)
+
     draws += tune
 
     if nuts_kwargs is not None:
@@ -280,6 +283,38 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
     return sample_func(**sample_args)[discard:]
 
 
+def _check_start_shape(model, start):
+    e = ''
+    if isinstance(start, (Sequence, np.ndarray)):
+        # to deal with iterable start argument
+        for start_iter in start:
+            _check_start_shape(model, start_iter)
+        return
+    elif not isinstance(start, dict):
+        raise TypeError("start argument must be a dict "
+                        "or an array-like of dicts")
+    for var in model.vars:
+        if var.name in start.keys():
+            var_shape = var.shape.tag.test_value
+            start_var_shape = np.shape(start[var.name])
+            if start_var_shape:
+                if not np.array_equal(var_shape, start_var_shape):
+                    e += "\nExpected shape {} for var '{}', got: {}".format(
+                        tuple(var_shape), var.name, start_var_shape
+                    )
+            # if start var has no shape
+            else:
+                # if model var has a specified shape
+                if var_shape:
+                    e += "\nExpected shape {} for var " \
+                         "'{}', got scalar {}".format(
+                        tuple(var_shape), var.name, start[var.name]
+                    )
+
+    if e != '':
+        raise ValueError("Bad shape for start argument:{}".format(e))
+
+
 def _sample(draws, step=None, start=None, trace=None, chain=0, tune=None,
             progressbar=True, model=None, random_seed=-1, live_plot=False,
             live_plot_kwargs=None, **kwargs):
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
@@ -110,6 +110,27 @@ def test_sample_tune_len(self):
             trace = pm.sample(draws=100, tune=50, njobs=4)
             assert len(trace) == 100
 
+    @pytest.mark.parametrize(
+        'start, error', [
+            ([1, 2], TypeError),
+            ({'x': 1}, ValueError),
+            ({'x': [1, 2, 3]}, ValueError),
+            ({'x': np.array([[1, 1], [1, 1]])}, ValueError)
+        ]
+    )
+    def test_sample_start_bad_shape(self, start, error):
+        with pytest.raises(error):
+            pm.sampling._check_start_shape(self.model, start)
+
+    @pytest.mark.parametrize(
+        'start', [
+            {'x': np.array([1, 1])},
+            [{'x': [10, 10]}, {'x': [-10, -10]}]
+        ]
+    )
+    def test_sample_start_good_shape(self, start):
+        pm.sampling._check_start_shape(self.model, start)
+
 
 def test_empty_model():
     with pm.Model():