Add interpolation options to moving quantile

kornilova203 · kornilova203 · commit 51f715b2503f · 2018-04-23T14:29:15.000+03:00
diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx
@@ -1356,26 +1356,53 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp,
     # print("output: {0}".format(output))
     return output
 
+def _get_interpolation_id(str interpolation):
+    """
+    Converts string to interpolation id
+
+    Parameters
+    ----------
+    interpolation: 'linear', 'lower', 'higher', 'nearest', 'midpoint'
+    """
+    if interpolation == 'linear':
+        return 0
+    elif interpolation == 'lower':
+        return 1
+    elif interpolation == 'higher':
+        return 2
+    elif interpolation == 'nearest':
+        return 3
+    elif interpolation == 'midpoint':
+        return 4
+    else:
+        raise ValueError("Interpolation {} is not supported"
+                         .format(interpolation))
+
 
 def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win,
                   int64_t minp, object index, object closed,
-                  double quantile):
+                  double quantile, str interpolation):
     """
     O(N log(window)) implementation using skip list
     """
     cdef:
-        double val, prev, midpoint
+        double val, prev, midpoint, idx_with_fraction
         IndexableSkiplist skiplist
         int64_t nobs = 0, i, j, s, e, N
         Py_ssize_t idx
         bint is_variable
         ndarray[int64_t] start, end
         ndarray[double_t] output
         double vlow, vhigh
+        int interpolation_id
 
     if quantile <= 0.0 or quantile >= 1.0:
         raise ValueError("quantile value {0} not in [0, 1]".format(quantile))
 
+    # interpolation_id is needed to avoid string comparisons inside the loop
+    # I tried to use callback but it resulted in worse performance
+    interpolation_id = _get_interpolation_id(interpolation)
+
     # we use the Fixed/Variable Indexer here as the
     # actual skiplist ops outweigh any window computation costs
     start, end, N, win, minp, is_variable = get_window_indexer(
@@ -1414,18 +1441,31 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win,
                     skiplist.insert(val)
 
         if nobs >= minp:
-            idx = int(quantile * <double>(nobs - 1))
-
-            # Single value in skip list
             if nobs == 1:
+                # Single value in skip list
                 output[i] = skiplist.get(0)
-
-            # Interpolated quantile
             else:
-                vlow = skiplist.get(idx)
-                vhigh = skiplist.get(idx + 1)
-                output[i] = ((vlow + (vhigh - vlow) *
-                             (quantile * (nobs - 1) - idx)))
+                idx_with_fraction = quantile * <double> (nobs - 1)
+                idx = int(idx_with_fraction)
+
+                if interpolation_id == 0:  # linear
+                    vlow = skiplist.get(idx)
+                    vhigh = skiplist.get(idx + 1)
+                    output[i] = ((vlow + (vhigh - vlow) *
+                        (idx_with_fraction - idx)))
+                elif interpolation_id == 1:  # lower
+                    output[i] = skiplist.get(idx)
+                elif interpolation_id == 2:  # higher
+                    output[i] = skiplist.get(idx + 1)
+                elif interpolation_id == 3:  # nearest
+                    if idx_with_fraction - idx < 0.5:
+                        output[i] = skiplist.get(idx)
+                    else:
+                        output[i] = skiplist.get(idx + 1)
+                elif interpolation_id == 4:  # midpoint
+                    vlow = skiplist.get(idx)
+                    vhigh = skiplist.get(idx + 1)
+                    output[i] = <double> (vlow + vhigh) / 2
         else:
             output[i] = NaN
 
diff --git a/pandas/core/window.py b/pandas/core/window.py
@@ -1246,9 +1246,21 @@ def kurt(self, **kwargs):
     Parameters
     ----------
     quantile : float
-        0 <= quantile <= 1""")
+        0 <= quantile <= 1
+    interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+        .. versionadded:: 0.23.0
 
-    def quantile(self, quantile, **kwargs):
+        This optional parameter specifies the interpolation method to use,
+        when the desired quantile lies between two data points `i` and `j`:
+
+            * linear: `i + (j - i) * fraction`, where `fraction` is the
+              fractional part of the index surrounded by `i` and `j`.
+            * lower: `i`.
+            * higher: `j`.
+            * nearest: `i` or `j` whichever is nearest.
+            * midpoint: (`i` + `j`) / 2.""")
+
+    def quantile(self, quantile, interpolation='linear', **kwargs):
         window = self._get_window()
         index, indexi = self._get_index()
 
@@ -1262,7 +1274,8 @@ def f(arg, *args, **kwargs):
                                         self.closed)
             else:
                 return _window.roll_quantile(arg, window, minp, indexi,
-                                             self.closed, quantile)
+                                             self.closed, quantile,
+                                             interpolation)
 
         return self._apply(f, 'quantile', quantile=quantile,
                            **kwargs)
@@ -1582,8 +1595,10 @@ def kurt(self, **kwargs):
     @Substitution(name='rolling')
     @Appender(_doc_template)
     @Appender(_shared_docs['quantile'])
-    def quantile(self, quantile, **kwargs):
-        return super(Rolling, self).quantile(quantile=quantile, **kwargs)
+    def quantile(self, quantile, interpolation='linear', **kwargs):  # here
+        return super(Rolling, self).quantile(quantile=quantile,
+                                             interpolation=interpolation,
+                                             **kwargs)
 
     @Substitution(name='rolling')
     @Appender(_doc_template)
diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
@@ -1135,7 +1135,22 @@ def test_rolling_quantile_series(self):
         s = Series(arr)
         q1 = s.quantile(0.1)
         q2 = s.rolling(100).quantile(0.1).iloc[-1]
+        tm.assert_almost_equal(q1, q2)
+
+        q1 = s.quantile(0.1, interpolation='lower')
+        q2 = s.rolling(100).quantile(0.1, interpolation='lower').iloc[-1]
+        tm.assert_almost_equal(q1, q2)
+
+        q1 = s.quantile(0.1, interpolation='higher')
+        q2 = s.rolling(100).quantile(0.1, interpolation='higher').iloc[-1]
+        tm.assert_almost_equal(q1, q2)
+
+        q1 = s.quantile(0.1, interpolation='nearest')
+        q2 = s.rolling(100).quantile(0.1, interpolation='nearest').iloc[-1]
+        tm.assert_almost_equal(q1, q2)
 
+        q1 = s.quantile(0.1, interpolation='midpoint')
+        q2 = s.rolling(100).quantile(0.1, interpolation='midpoint').iloc[-1]
         tm.assert_almost_equal(q1, q2)
 
     def test_rolling_quantile_param(self):