diff --git a/pandas/src/moments.pyx b/pandas/src/moments.pyx index 5ec264b6a5b87..aee9935b2fcb6 100644 --- a/pandas/src/moments.pyx +++ b/pandas/src/moments.pyx @@ -9,6 +9,18 @@ # # - +def _check_minp(win, minp, N): + if minp > win: + raise ValueError('min_periods %d must be <= window %d' + % (minp, win)) + elif minp > N: + minp = N + 1 + elif minp == 0: + minp = 1 + elif minp < 0: + raise ValueError('min_periods must be >= 0') + return minp + # original C implementation by N. Devillard. # This code in public domain. # Function : kth_smallest() @@ -24,7 +36,6 @@ # Physical description: 366 p. # Series: Prentice-Hall Series in Automatic Computation - def kth_smallest(ndarray[double_t] a, Py_ssize_t k): cdef: Py_ssize_t i,j,l,m,n @@ -149,7 +160,7 @@ def roll_sum(ndarray[double_t] input, int win, int minp): cdef ndarray[double_t] output = np.empty(N, dtype=float) - minp = _check_minp(minp, N) + minp = _check_minp(win, minp, N) for i from 0 <= i < minp - 1: val = input[i] @@ -192,7 +203,7 @@ def roll_mean(ndarray[double_t] input, cdef ndarray[double_t] output = np.empty(N, dtype=float) - minp = _check_minp(minp, N) + minp = _check_minp(win, minp, N) for i from 0 <= i < minp - 1: val = input[i] @@ -335,15 +346,6 @@ def nancorr(ndarray[float64_t, ndim=2] mat, cov=False): #---------------------------------------------------------------------- # Rolling variance -def _check_minp(minp, N): - if minp > N: - minp = N + 1 - elif minp == 0: - minp = 1 - elif minp < 0: - raise ValueError('min_periods must be >= 0') - return minp - def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1): cdef double val, prev, sum_x = 0, sum_xx = 0, nobs = 0 cdef Py_ssize_t i @@ -351,7 +353,7 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1): cdef ndarray[double_t] output = np.empty(N, dtype=float) - minp = _check_minp(minp, N) + minp = _check_minp(win, minp, N) for i from 0 <= i < minp - 1: val = input[i] @@ -400,7 +402,7 @@ def roll_skew(ndarray[double_t] input, int win, int minp): # 3 components of the skewness equation cdef double A, B, C, R - minp = _check_minp(minp, N) + minp = _check_minp(win, minp, N) for i from 0 <= i < minp - 1: val = input[i] @@ -462,7 +464,7 @@ def roll_kurt(ndarray[double_t] input, # 5 components of the kurtosis equation cdef double A, B, C, D, R, K - minp = _check_minp(minp, N) + minp = _check_minp(win, minp, N) for i from 0 <= i < minp - 1: val = input[i] @@ -533,7 +535,7 @@ cdef _roll_skiplist_op(ndarray arg, int win, int minp, skiplist_f op): skiplist = IndexableSkiplist(win) - minp = _check_minp(minp, N) + minp = _check_minp(win, minp, N) for i from 0 <= i < minp - 1: val = input[i] @@ -578,7 +580,7 @@ def roll_median_c(ndarray[float64_t] arg, int win, int minp): sl = skiplist_init(win) - minp = _check_minp(minp, N) + minp = _check_minp(win, minp, N) for i from 0 <= i < minp - 1: val = arg[i] @@ -672,11 +674,17 @@ def roll_max2(ndarray[float64_t] a, int window, int minp): cdef np.ndarray[np.float64_t, ndim=1] y = PyArray_EMPTY(1, dims, NPY_float64, 0) - minp = _check_minp(minp, n0) + if window < 1: + raise ValueError('Invalid window size %d' + % (window)) - if (window < 1) or (window > n0): - raise ValueError('Invalid window size %d for len %d array' - % (window, n0)) + if minp > window: + raise ValueError('Invalid min_periods size %d greater than window %d' + % (minp, window)) + + minp = _check_minp(window, minp, n0) + + window = min(window, n0) ring = stdlib.malloc(window * sizeof(pairs)) end = ring + window @@ -766,11 +774,18 @@ def roll_min2(np.ndarray[np.float64_t, ndim=1] a, int window, int minp): cdef np.npy_intp *dims = [n0] cdef np.ndarray[np.float64_t, ndim=1] y = PyArray_EMPTY(1, dims, NPY_float64, 0) - if (window < 1) or (window > n0): - raise ValueError('Invalid window size %d for len %d array' - % (window, n0)) - minp = _check_minp(minp, n0) + if window < 1: + raise ValueError('Invalid window size %d' + % (window)) + + if minp > window: + raise ValueError('Invalid min_periods size %d greater than window %d' + % (minp, window)) + + window = min(window, n0) + + minp = _check_minp(window, minp, n0) ring = stdlib.malloc(window * sizeof(pairs)) end = ring + window @@ -843,7 +858,7 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int win, skiplist = IndexableSkiplist(win) - minp = _check_minp(minp, N) + minp = _check_minp(win, minp, N) for i from 0 <= i < minp - 1: val = input[i] @@ -892,7 +907,7 @@ def roll_generic(ndarray[float64_t, cast=True] input, int win, if n == 0: return input - minp = _check_minp(minp, n) + minp = _check_minp(win, minp, n) output = np.empty(n, dtype=float) counts = roll_sum(np.isfinite(input).astype(float), win, minp) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 275ce4c47f6c3..8ea145c7d40f6 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -50,9 +50,21 @@ def test_rolling_median(self): def test_rolling_min(self): self._check_moment_func(mom.rolling_min, np.min) + a = np.array([1,2,3,4,5]) + b = mom.rolling_min(a, window=100, min_periods=1) + assert_almost_equal(b, np.ones(len(a))) + + self.assertRaises(ValueError, mom.rolling_min, np.array([1,2,3]), window=3, min_periods=5) + def test_rolling_max(self): self._check_moment_func(mom.rolling_max, np.max) + a = np.array([1,2,3,4,5]) + b = mom.rolling_max(a, window=100, min_periods=1) + assert_almost_equal(a, b) + + self.assertRaises(ValueError, mom.rolling_max, np.array([1,2,3]), window=3, min_periods=5) + def test_rolling_quantile(self): qs = [.1, .5, .9]