Merge pull request #90 from predict-idlab/numeric_object_series

jonasvdd · web-flow · commit 49317d3c4f02 · 2022-06-29T16:21:06.000+02:00
Fix numeric `hf_y` input as dtype object
diff --git a/plotly_resampler/aggregation/aggregation_interface.py b/plotly_resampler/aggregation/aggregation_interface.py
@@ -152,10 +152,6 @@ def aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
 
         self._supports_dtype(s)
 
-        # convert the bool values to uint8 (as we will display them on a y-axis)
-        if str(s.dtype) == "bool":
-            s = s.astype("uint8")
-
         if len(s) > n_out:
             # More samples that n_out -> perform data aggregation
             s = self._aggregate(s, n_out=n_out)
diff --git a/plotly_resampler/figure_resampler/figure_resampler_interface.py b/plotly_resampler/figure_resampler/figure_resampler_interface.py
@@ -457,6 +457,7 @@ def _get_figure_class(constr: type) -> type:
 
         """
         from ..registering import _get_plotly_constr  # To avoid ImportError
+
         return _get_plotly_constr(constr)
 
     @staticmethod
@@ -679,7 +680,14 @@ def _parse_get_trace_props(
             # transform it to type string as such it will be sent as categorical data
             # to the downsampling algorithm
             if hf_y.dtype == "object":
-                hf_y = hf_y.astype("str")
+                # But first, we try to parse to a numeric dtype (as this is the
+                # behavior that plotly supports)
+                # Note that a bool array of type object will remain a bool array (and 
+                # not will be transformed to an array of ints (0, 1))
+                try:
+                    hf_y = pd.to_numeric(hf_y, errors="raise")
+                except:
+                    hf_y = hf_y.astype("str")
 
             # orjson encoding doesn't like to encode with uint8 & uint16 dtype
             if str(hf_y.dtype) in ["uint8", "uint16"]:
@@ -1097,10 +1105,7 @@ def _copy_hf_data(self, hf_data: dict, adjust_default_values: bool = False) -> d
 
         """
         hf_data_cp = {
-            uid: {
-                k: hf_dict[k]
-                for k in set(hf_dict.keys())
-            }
+            uid: {k: hf_dict[k] for k in set(hf_dict.keys())}
             for uid, hf_dict in hf_data.items()
         }
 
diff --git a/tests/test_figure_resampler.py b/tests/test_figure_resampler.py
@@ -871,4 +871,59 @@ def test_fr_copy_hf_data(float_series):
     assert len(fr_fig.hf_data[0]["x"]) == 10_000
     assert len(fr_fig.hf_data[0]["y"]) == 10_000
     assert len(fr_fig.hf_data[1]["x"]) == 10_000
-    assert len(fr_fig.hf_data[1]["y"]) == 10_000
+    assert len(fr_fig.hf_data[1]["y"]) == 10_000
+
+
+def test_fr_object_hf_data(float_series):
+    float_series_o = float_series.astype(object)
+
+    fig = FigureResampler()
+    fig.add_trace({"name": "s0"}, hf_y=float_series_o)
+    assert float_series_o.dtype == object
+    assert len(fig.hf_data) == 1
+    assert fig.hf_data[0]["y"].dtype == "float64"
+    assert fig.data[0]["y"].dtype == "float64"
+
+
+def test_fr_object_bool_data(bool_series):
+    # First try with the original non-object bool series
+    fig = FigureResampler()
+    fig.add_trace({"name": "s0"}, hf_y=bool_series)
+    assert len(fig.hf_data) == 1
+    assert fig.hf_data[0]["y"].dtype == "bool"
+    # plotly internally ocnverts this to object
+    assert fig.data[0]["y"].dtype == "object"
+
+    # Now try with the object bool series
+    bool_series_o = bool_series.astype(object)
+
+    fig = FigureResampler()
+    fig.add_trace({"name": "s0"}, hf_y=bool_series_o)
+    assert bool_series_o.dtype == object
+    assert len(fig.hf_data) == 1
+    assert fig.hf_data[0]["y"].dtype == "bool"
+    # plotly internally converts this to object
+    assert fig.data[0]["y"].dtype == "object"
+
+
+def test_fr_object_binary_data():
+    binary_series = np.array([0, 1]*20)  # as this is << max_n_samples -> limit_to_view
+
+    # First try with the original non-object binary series
+    fig = FigureResampler()
+    fig.add_trace({"name": "s0"}, hf_y=binary_series, limit_to_view=True)
+    assert len(fig.hf_data) == 1
+    assert fig.hf_data[0]["y"].dtype == "int64"
+    assert fig.data[0]["y"].dtype == "int64"
+    assert np.all(fig.data[0]["y"] == binary_series)
+
+    # Now try with the object binary series
+    binary_series_o = binary_series.astype(object)
+
+    fig = FigureResampler()
+    fig.add_trace({"name": "s0"}, hf_y=binary_series_o, limit_to_view=True)
+    assert binary_series_o.dtype == object
+    assert len(fig.hf_data) == 1
+    assert fig.hf_data[0]["y"].dtype == "int64"
+    assert fig.data[0]["y"].dtype == "int64"
+    assert np.all(fig.data[0]["y"] == binary_series)
diff --git a/tests/test_figurewidget_resampler.py b/tests/test_figurewidget_resampler.py
@@ -1596,7 +1596,7 @@ def test_fwr_from_list_dict(float_series):
     assert fr_fig.data[1].uid in fr_fig._hf_data
 
     # redo the exercise with a new low-freq trace
-    base_fig.append({'y': float_series[:1000], 'name': "s_no_agg"})
+    base_fig.append({"y": float_series[:1000], "name": "s_no_agg"})
     fr_fig = FigureWidgetResampler(base_fig, default_n_shown_samples=1000)
     assert len(fr_fig.hf_data) == 2
     assert len(fr_fig.data) == 3
@@ -1626,17 +1626,17 @@ def test_fwr_list_dict_add_trace(float_series):
     assert fr_fig.data[1].uid in fr_fig._hf_data
 
     # redo the exercise with a new low-freq trace
-    fr_fig.add_trace({'y': float_series[:1000], 'name': "s_no_agg"})
+    fr_fig.add_trace({"y": float_series[:1000], "name": "s_no_agg"})
     assert len(fr_fig.hf_data) == 2
     assert len(fr_fig.data) == 3
 
     # add low-freq trace but set limit_to_view to True
-    fr_fig.add_trace({'y': float_series[:100], 'name': "s_agg"}, limit_to_view=True)
+    fr_fig.add_trace({"y": float_series[:100], "name": "s_agg"}, limit_to_view=True)
     assert len(fr_fig.hf_data) == 3
     assert len(fr_fig.data) == 4
 
     # add a low-freq trace but adjust max_n_samples
-    lf_series = {'y': float_series[:1000], 'name': "s_agg"}
+    lf_series = {"y": float_series[:1000], "name": "s_agg"}
     # plotly its default behavior raises a ValueError when a list or tuple is passed
     # to add_trace
     with pytest.raises(ValueError):
@@ -1672,18 +1672,18 @@ def test_fwr_list_dict_add_traces(float_series):
 
     # redo the exercise with a new low-freq trace
     # plotly also allows a dict or a scatter object as input
-    fr_fig.add_traces({'y': float_series[:1000], 'name': "s_no_agg"})
+    fr_fig.add_traces({"y": float_series[:1000], "name": "s_no_agg"})
     assert len(fr_fig.hf_data) == 2
     assert len(fr_fig.data) == 3
 
     # add low-freq trace but set limit_to_view to True
-    fr_fig.add_traces([{'y': float_series[:100], 'name': "s_agg"}], limit_to_views=True)
+    fr_fig.add_traces([{"y": float_series[:100], "name": "s_agg"}], limit_to_views=True)
     assert len(fr_fig.hf_data) == 3
     assert len(fr_fig.data) == 4
 
     # add a low-freq trace but adjust max_n_samples
     # note that we use tuple as input
-    fr_fig.add_traces(({'y': float_series[:1000], 'name': "s_agg"}, ), max_n_samples=999)
+    fr_fig.add_traces(({"y": float_series[:1000], "name": "s_agg"},), max_n_samples=999)
     assert len(fr_fig.hf_data) == 4
     assert len(fr_fig.data) == 5
 
@@ -1710,7 +1710,7 @@ def test_fwr_list_scatter_add_traces(float_series):
     assert fr_fig.data[1].uid in fr_fig._hf_data
 
     # redo the exercise with a new low-freq trace
-    fr_fig.add_traces([go.Scattergl({'y': float_series[:1000], 'name': "s_no_agg"})])
+    fr_fig.add_traces([go.Scattergl({"y": float_series[:1000], "name": "s_no_agg"})])
     assert len(fr_fig.hf_data) == 2
     assert len(fr_fig.data) == 3
 
@@ -1721,10 +1721,13 @@ def test_fwr_list_scatter_add_traces(float_series):
     assert len(fr_fig.data) == 4
 
     # add a low-freq trace but adjust max_n_samples
-    fr_fig.add_traces(go.Scatter({'y': float_series[:1000], 'name': "s_agg"}), max_n_samples=999)
+    fr_fig.add_traces(
+        go.Scatter({"y": float_series[:1000], "name": "s_agg"}), max_n_samples=999
+    )
     assert len(fr_fig.hf_data) == 4
     assert len(fr_fig.data) == 5
 
+
 def test_fwr_add_scatter():
     # Checks whether the add_scatter method works as expected
     # .add_scatter calls `add_traces` under the hood
@@ -1736,3 +1739,60 @@ def test_fwr_add_scatter():
     assert len(fw_orig.data[0].y) == 2_000
     assert len(fw_pr.data[0]["y"]) == 1_000
     assert np.all(fw_orig.data[0].y == fw_pr.hf_data[0]["y"])
+
+
+def test_fwr_object_hf_data(
+    float_series,
+):
+    float_series_o = float_series.astype(object)
+
+    fig = FigureWidgetResampler()
+    fig.add_trace({"name": "s0"}, hf_y=float_series_o)
+    assert float_series_o.dtype == object
+    assert len(fig.hf_data) == 1
+    assert fig.hf_data[0]["y"].dtype == "float64"
+    assert fig.data[0]["y"].dtype == "float64"
+
+
+def test_fwr_object_bool_data(bool_series):
+    # First try with the original non-object bool series
+    fig = FigureWidgetResampler()
+    fig.add_trace({"name": "s0"}, hf_y=bool_series)
+    assert len(fig.hf_data) == 1
+    assert fig.hf_data[0]["y"].dtype == "bool"
+    # plotly internally ocnverts this to object
+    assert fig.data[0]["y"].dtype == "object"
+
+    # Now try with the object bool series
+    bool_series_o = bool_series.astype(object)
+
+    fig = FigureWidgetResampler()
+    fig.add_trace({"name": "s0"}, hf_y=bool_series_o)
+    assert bool_series_o.dtype == object
+    assert len(fig.hf_data) == 1
+    assert fig.hf_data[0]["y"].dtype == "bool"
+    # plotly internally ocnverts this to object
+    assert fig.data[0]["y"].dtype == "object"
+
+
+def test_fwr_object_binary_data():
+    binary_series = np.array([0, 1]*20)  # as this is << max_n_samples -> limit_to_view
+
+    # First try with the original non-object binary series
+    fig = FigureWidgetResampler()
+    fig.add_trace({"name": "s0"}, hf_y=binary_series, limit_to_view=True)
+    assert len(fig.hf_data) == 1
+    assert fig.hf_data[0]["y"].dtype == "int64"
+    assert fig.data[0]["y"].dtype == "int64"
+    assert np.all(fig.data[0]["y"] == binary_series)
+
+    # Now try with the object binary series
+    binary_series_o = binary_series.astype(object)
+
+    fig = FigureWidgetResampler()
+    fig.add_trace({"name": "s0"}, hf_y=binary_series_o, limit_to_view=True)
+    assert binary_series_o.dtype == object
+    assert len(fig.hf_data) == 1
+    assert fig.hf_data[0]["y"].dtype == "int64"
+    assert fig.data[0]["y"].dtype == "int64"
+    assert np.all(fig.data[0]["y"] == binary_series)