🐛 Small bugfixes

jonasvdd · web-flow · commit 5135b6026695 · 2022-05-10T18:14:46.000+02:00
🐛 Small bugfixes
diff --git a/plotly_resampler/__init__.py b/plotly_resampler/__init__.py
@@ -11,7 +11,7 @@
 
 __docformat__ = "numpy"
 __author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost"
-__version__ = "0.5.0"
+__version__ = "0.6.3"
 
 __all__ = [
     "__version__",
diff --git a/plotly_resampler/aggregation/aggregation_interface.py b/plotly_resampler/aggregation/aggregation_interface.py
@@ -54,7 +54,9 @@ def _supports_dtype(self, s: pd.Series):
     @staticmethod
     def _calc_med_diff(s: pd.Series) -> Tuple[float, np.ndarray]:
         # ----- divide and conquer heuristic to calculate the median diff ------
-        s_idx_diff = np.diff(s.index.values)  # remark: s_idx_diff.shape === len(s) -1
+        # remark: thanks to the prepend -> s_idx_diff.shape === len(s)
+        siv = s.index.values
+        s_idx_diff = np.diff(s.index.values, prepend=siv[0])
 
         # To do so - use a quantile-based (median) approach where we reshape the data
         # into `n_blocks` blocks and calculate the min
@@ -66,9 +68,11 @@ def _calc_med_diff(s: pd.Series) -> Tuple[float, np.ndarray]:
             sid_v: np.ndarray = s_idx_diff[: blck_size * n_blcks].reshape(n_blcks, -1)
 
             # calculate the min and max and calculate the median on that
-            med_diff = np.median(np.concatenate((sid_v.min(axis=0), sid_v.max(axis=0))))
+            med_diff = np.quantile(
+                np.concatenate((sid_v.min(axis=0), sid_v.max(axis=0))), q=0.55
+            )
         else:
-            med_diff = np.median(s_idx_diff)
+            med_diff = np.quantile(s_idx_diff, q=0.55)
 
         return med_diff, s_idx_diff
 
@@ -77,17 +81,17 @@ def _insert_gap_none(self, s: pd.Series) -> pd.Series:
         med_diff, s_idx_diff = self._calc_med_diff(s)
         # add None data-points in-between the gaps
         if med_diff is not None:
-            df_gap_idx = s.index.values[1:][s_idx_diff > 3 * med_diff]
+            df_gap_idx = s.index.values[s_idx_diff > 3 * med_diff]
             if len(df_gap_idx):
                 df_res_gap = pd.Series(
                     index=df_gap_idx, data=None, name=s.name, copy=False
                 )
 
                 if isinstance(df_res_gap.index, pd.DatetimeIndex):
-                    # Due to the s.index`.values` cast, df_res_gap has lost 
+                    # Due to the s.index`.values` cast, df_res_gap has lost
                     # time-information, so now we restore it
-                    df_res_gap.index = (
-                        df_res_gap.index.tz_localize('UTC').tz_convert(s.index.tz)
+                    df_res_gap.index = df_res_gap.index.tz_localize("UTC").tz_convert(
+                        s.index.tz
                     )
 
                 # Note:
@@ -104,8 +108,7 @@ def _replace_gap_end_none(self, s: pd.Series) -> pd.Series:
         med_diff, s_idx_diff = self._calc_med_diff(s)
         if med_diff is not None:
             # Replace data-points with None where the gaps occur
-            s.iloc[1:].loc[s_idx_diff > 3 * med_diff] = None
-
+            s.loc[s_idx_diff > 3 * med_diff] = None
         return s
 
     def aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
diff --git a/plotly_resampler/aggregation/aggregators.py b/plotly_resampler/aggregation/aggregators.py
@@ -44,7 +44,10 @@ class LTTB(AbstractSeriesAggregator):
 
     """
 
-    def __init__(self, interleave_gaps: bool = True, ):
+    def __init__(
+        self,
+        interleave_gaps: bool = True,
+    ):
         """
         Parameters
         ----------
@@ -56,7 +59,7 @@ def __init__(self, interleave_gaps: bool = True, ):
         super().__init__(
             interleave_gaps,
             dtype_regex_list=[rf"{dtype}\d*" for dtype in ["float", "int", "uint"]]
-                             + ["category", "bool"],
+            + ["category", "bool"],
         )
 
     def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
@@ -66,11 +69,11 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
         s_i = s.index.values
 
         if s_i.dtype.type == np.datetime64:
-            # lttbc does not support this datatype -> convert to int 
+            # lttbc does not support this datatype -> convert to int
             # (where the time is represented in ns)
             s_i = s_i.astype(int)
             idx, data = lttbc.downsample(s_i, s_v, n_out)
-            idx = pd.to_datetime(idx, unit='ns', utc=True).tz_convert(s.index.tz)
+            idx = pd.to_datetime(idx, unit="ns", utc=True).tz_convert(s.index.tz)
         else:
             idx, data = lttbc.downsample(s_i, s_v, n_out)
             idx = idx.astype(s_i.dtype)
@@ -129,12 +132,14 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
         # add the corresponding offset
         argmin = (
             s[: block_size * offset.shape[0]]
-            .values.reshape(-1, block_size).argmin(axis=1)
+            .values.reshape(-1, block_size)
+            .argmin(axis=1)
             + offset
         )
         argmax = (
-            s[argmax_offset: block_size * offset.shape[0] + argmax_offset]
-            .values.reshape(-1, block_size).argmax(axis=1)
+            s[argmax_offset : block_size * offset.shape[0] + argmax_offset]
+            .values.reshape(-1, block_size)
+            .argmax(axis=1)
             + offset
             + argmax_offset
         )
@@ -150,7 +155,7 @@ class MinMaxAggregator(AbstractSeriesAggregator):
     .. note::
         This method is rather efficient when scaling to large data sizes and can be used
         as a data-reduction step before feeding it to the :class:`LTTB <LTTB>`
-        algorithm, as :class:`EfficientLTTB <EfficientLTTB>` does with the 
+        algorithm, as :class:`EfficientLTTB <EfficientLTTB>` does with the
         :class:`MinMaxOverlapAggregator <MinMaxOverlapAggregator>`.
 
     """
@@ -173,22 +178,35 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
         block_size = math.ceil(s.shape[0] / n_out * 2)
 
         # Calculate the offset range which will be added to the argmin and argmax pos
-        offset = np.arange(
-            0, stop=s.shape[0] - block_size, step=block_size
-        )
+        offset = np.arange(0, stop=s.shape[0] - block_size, step=block_size)
 
         # Calculate the argmin & argmax on the reshaped view of `s` &
         # add the corresponding offset
         argmin = (
             s[: block_size * offset.shape[0]]
-            .values.reshape(-1, block_size).argmin(axis=1)
+            .values.reshape(-1, block_size)
+            .argmin(axis=1)
             + offset
         )
         argmax = (
             s[: block_size * offset.shape[0]]
-            .values.reshape(-1, block_size).argmax(axis=1)
+            .values.reshape(-1, block_size)
+            .argmax(axis=1)
             + offset
         )
+
+        # Note: the implementation below flips the array to search from 
+        # right-to left (as min or max will always usee the first same minimum item, 
+        # i.e. the most left item)
+        # This however creates a large computational overhead -> we do not use this 
+        # implementation and suggest using the minmaxaggregator.
+        # argmax = (
+        #     (block_size - 1)
+        #     - np.fliplr(
+        #         s[: block_size * offset.shape[0]].values.reshape(-1, block_size)
+        #     ).argmax(axis=1)
+        # ) + offset
+
         # Sort the argmin & argmax (where we append the first and last index item)
         # and then slice the original series on these indexes.
         return s.iloc[np.unique(np.concatenate((argmin, argmax, [0, s.shape[0] - 1])))]
@@ -209,14 +227,18 @@ def __init__(self, interleave_gaps: bool = True):
             sampled data. A quantile-based approach is used to determine the gaps /
             irregularly sampled data. By default, True.
         """
-        self.lttb = LTTB(interleave_gaps=interleave_gaps)
-        self.minmax = MinMaxOverlapAggregator(interleave_gaps=interleave_gaps)
-        super().__init__(interleave_gaps, dtype_regex_list=None)
+        self.lttb = LTTB(interleave_gaps=False)
+        self.minmax = MinMaxOverlapAggregator(interleave_gaps=False)
+        super().__init__(
+            interleave_gaps,
+            dtype_regex_list=[rf"{dtype}\d*" for dtype in ["float", "int", "uint"]]
+            + ["category", "bool"],
+        )
 
     def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
         if s.shape[0] > n_out * 1_000:
             s = self.minmax._aggregate(s, n_out * 50)
-        return self.lttb.aggregate(s, n_out)
+        return self.lttb._aggregate(s, n_out)
 
 
 class EveryNthPoint(AbstractSeriesAggregator):
@@ -249,7 +271,7 @@ class FuncAggregator(AbstractSeriesAggregator):
     """
 
     def __init__(
-            self, aggregation_func, interleave_gaps: bool = True, dtype_regex_list=None
+        self, aggregation_func, interleave_gaps: bool = True, dtype_regex_list=None
     ):
         """
         Parameters
diff --git a/plotly_resampler/figure_resampler/figure_resampler_interface.py b/plotly_resampler/figure_resampler/figure_resampler_interface.py
@@ -602,20 +602,15 @@ def add_trace(
         trace.uid = uuid
 
         hf_x = (
-            trace["x"]
-            if hasattr(trace, "x") and hf_x is None
-            else hf_x.values
-            if isinstance(hf_x, pd.Series)
-            else hf_x
+            trace["x"] if hasattr(trace, "x") and hf_x is None 
+            else hf_x.values if isinstance(hf_x, pd.Series)
+            else hf_x if isinstance(hf_x, pd.Index)
+            else np.asarray(hf_x)
         )
-        if isinstance(hf_x, tuple):
-            hf_x = list(hf_x)
 
         hf_y = (
-            trace["y"]
-            if hasattr(trace, "y") and hf_y is None
-            else hf_y.values
-            if isinstance(hf_y, pd.Series)
+            trace["y"] if hasattr(trace, "y") and hf_y is None 
+            else hf_y.values if isinstance(hf_y, (pd.Series, pd.Index))
             else hf_y
         )
         hf_y = np.asarray(hf_y)
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "plotly-resampler"  # Do not forget to update the __init__.py __version__ variable
-version = "0.5.0"
+version = "0.6.3"
 description = "Visualizing large time series with plotly"
 authors = ["Jonas Van Der Donckt", "Jeroen Van Der Donckt", "Emiel Deprost"]
 readme = "README.md"
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -59,7 +59,7 @@ def cat_series() -> pd.Series:
 
 @pytest.fixture
 def bool_series() -> pd.Series:
-    bool_list = [True, False, True, True, True, True]
+    bool_list = [True, False, True, True, True, True] + [True] * 50
     return pd.Series(bool_list * (_nb_samples // len(bool_list)), dtype="bool")