Merge pull request #148 from statlab/doc-fix

pbstark · web-flow · commit 2ff8f4861fea · 2019-08-07T12:10:38.000-07:00
DOC: fix documentation of calling signature for test statistic in core
diff --git a/permute/core.py b/permute/core.py
@@ -196,16 +196,15 @@ def two_sample(x, y, reps=10**5, stat='mean', alternative="greater",
             approximating the permutation distribution.
             The t-statistic is computed using scipy.stats.ttest_ind
         (c) If stat is a function (a callable object), the test statistic is
-            that function.  The function should take two arguments:
+            that function. The function should take two arguments:
             given a permutation of the pooled data, the first argument is the
             "new" x and the second argument is the "new" y.
             For instance, if the test statistic is the Kolmogorov-Smirnov distance 
             between the empirical distributions of the two samples, 
-            $\max_t |F_x(t) - F_y(t)|$,
-            the test statistic could be written:
+            $\max_t |F_x(t) - F_y(t)|$, the test statistic could be written:
 
             f = lambda u, v: np.max( \
-                [abs(sum(u<=v)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\
+                [abs(sum(u<=val)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\
                 )
 
     alternative : {'greater', 'less', 'two-sided'}
@@ -299,16 +298,17 @@ def two_sample_shift(x, y, reps=10**5, stat='mean', alternative="greater",
             approximating the permutation distribution.
             The t-statistic is computed using scipy.stats.ttest_ind
         (c) If stat is a function (a callable object), the test statistic is
-            that function.  The function should take a permutation of the pooled
-            data and compute the test function from it. For instance, if the
-            test statistic is the Kolmogorov-Smirnov distance between the
-            empirical distributions of the two samples, $\max_t |F_x(t) - F_y(t)|$,
-            the test statistic could be written:
+            that function. The function should take two arguments:
+            given a permutation of the pooled data, the first argument is the
+            "new" x and the second argument is the "new" y.
+            For instance, if the test statistic is the Kolmogorov-Smirnov distance 
+            between the empirical distributions of the two samples, 
+            $\max_t |F_x(t) - F_y(t)|$, the test statistic could be written:
 
-            f = lambda u: np.max( \
-                [abs(sum(u[:len(x)]<=v)/len(x)-sum(u[len(x):]<=v)/len(y)) for v in u]\
+            f = lambda u, v: np.max( \
+                [abs(sum(u<=val)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\
                 )
-
+                
     alternative : {'greater', 'less', 'two-sided'}
         The alternative hypothesis to test
     keep_dist : bool
@@ -414,15 +414,17 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None,
             approximating the permutation distribution.
             The t-statistic is computed using scipy.stats.ttest_ind
         (c) If stat is a function (a callable object), the test statistic is
-            that function.  The function should take a permutation of the pooled
-            data and compute the test function from it. For instance, if the
-            test statistic is the Kolmogorov-Smirnov distance between the
-            empirical distributions of the two samples, $\max_t |F_x(t) - F_y(t)|$,
-            the test statistic could be written:
+            that function. The function should take two arguments:
+            given a permutation of the pooled data, the first argument is the
+            "new" x and the second argument is the "new" y.
+            For instance, if the test statistic is the Kolmogorov-Smirnov distance 
+            between the empirical distributions of the two samples, 
+            $\max_t |F_x(t) - F_y(t)|$, the test statistic could be written:
 
-            f = lambda u: np.max( \
-                [abs(sum(u[:len(x)]<=v)/len(x)-sum(u[len(x):]<=v)/len(y)) for v in u]\
+            f = lambda u, v: np.max( \
+                [abs(sum(u<=val)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\
                 )
+                
     shift : float
         The relationship between x and y under the null hypothesis.