Skip to content

Commit 49317d3

Browse files
authored
Merge pull request #90 from predict-idlab/numeric_object_series
Fix numeric `hf_y` input as dtype object
2 parents cc951c1 + 82ed5e9 commit 49317d3

File tree

4 files changed

+135
-19
lines changed

4 files changed

+135
-19
lines changed

plotly_resampler/aggregation/aggregation_interface.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,6 @@ def aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
152152

153153
self._supports_dtype(s)
154154

155-
# convert the bool values to uint8 (as we will display them on a y-axis)
156-
if str(s.dtype) == "bool":
157-
s = s.astype("uint8")
158-
159155
if len(s) > n_out:
160156
# More samples that n_out -> perform data aggregation
161157
s = self._aggregate(s, n_out=n_out)

plotly_resampler/figure_resampler/figure_resampler_interface.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,7 @@ def _get_figure_class(constr: type) -> type:
457457
458458
"""
459459
from ..registering import _get_plotly_constr # To avoid ImportError
460+
460461
return _get_plotly_constr(constr)
461462

462463
@staticmethod
@@ -679,7 +680,14 @@ def _parse_get_trace_props(
679680
# transform it to type string as such it will be sent as categorical data
680681
# to the downsampling algorithm
681682
if hf_y.dtype == "object":
682-
hf_y = hf_y.astype("str")
683+
# But first, we try to parse to a numeric dtype (as this is the
684+
# behavior that plotly supports)
685+
# Note that a bool array of type object will remain a bool array (and
686+
# not will be transformed to an array of ints (0, 1))
687+
try:
688+
hf_y = pd.to_numeric(hf_y, errors="raise")
689+
except:
690+
hf_y = hf_y.astype("str")
683691

684692
# orjson encoding doesn't like to encode with uint8 & uint16 dtype
685693
if str(hf_y.dtype) in ["uint8", "uint16"]:
@@ -1097,10 +1105,7 @@ def _copy_hf_data(self, hf_data: dict, adjust_default_values: bool = False) -> d
10971105
10981106
"""
10991107
hf_data_cp = {
1100-
uid: {
1101-
k: hf_dict[k]
1102-
for k in set(hf_dict.keys())
1103-
}
1108+
uid: {k: hf_dict[k] for k in set(hf_dict.keys())}
11041109
for uid, hf_dict in hf_data.items()
11051110
}
11061111

tests/test_figure_resampler.py

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -871,4 +871,59 @@ def test_fr_copy_hf_data(float_series):
871871
assert len(fr_fig.hf_data[0]["x"]) == 10_000
872872
assert len(fr_fig.hf_data[0]["y"]) == 10_000
873873
assert len(fr_fig.hf_data[1]["x"]) == 10_000
874-
assert len(fr_fig.hf_data[1]["y"]) == 10_000
874+
assert len(fr_fig.hf_data[1]["y"]) == 10_000
875+
876+
877+
def test_fr_object_hf_data(float_series):
878+
float_series_o = float_series.astype(object)
879+
880+
fig = FigureResampler()
881+
fig.add_trace({"name": "s0"}, hf_y=float_series_o)
882+
assert float_series_o.dtype == object
883+
assert len(fig.hf_data) == 1
884+
assert fig.hf_data[0]["y"].dtype == "float64"
885+
assert fig.data[0]["y"].dtype == "float64"
886+
887+
888+
def test_fr_object_bool_data(bool_series):
889+
# First try with the original non-object bool series
890+
fig = FigureResampler()
891+
fig.add_trace({"name": "s0"}, hf_y=bool_series)
892+
assert len(fig.hf_data) == 1
893+
assert fig.hf_data[0]["y"].dtype == "bool"
894+
# plotly internally ocnverts this to object
895+
assert fig.data[0]["y"].dtype == "object"
896+
897+
# Now try with the object bool series
898+
bool_series_o = bool_series.astype(object)
899+
900+
fig = FigureResampler()
901+
fig.add_trace({"name": "s0"}, hf_y=bool_series_o)
902+
assert bool_series_o.dtype == object
903+
assert len(fig.hf_data) == 1
904+
assert fig.hf_data[0]["y"].dtype == "bool"
905+
# plotly internally converts this to object
906+
assert fig.data[0]["y"].dtype == "object"
907+
908+
909+
def test_fr_object_binary_data():
910+
binary_series = np.array([0, 1]*20) # as this is << max_n_samples -> limit_to_view
911+
912+
# First try with the original non-object binary series
913+
fig = FigureResampler()
914+
fig.add_trace({"name": "s0"}, hf_y=binary_series, limit_to_view=True)
915+
assert len(fig.hf_data) == 1
916+
assert fig.hf_data[0]["y"].dtype == "int64"
917+
assert fig.data[0]["y"].dtype == "int64"
918+
assert np.all(fig.data[0]["y"] == binary_series)
919+
920+
# Now try with the object binary series
921+
binary_series_o = binary_series.astype(object)
922+
923+
fig = FigureResampler()
924+
fig.add_trace({"name": "s0"}, hf_y=binary_series_o, limit_to_view=True)
925+
assert binary_series_o.dtype == object
926+
assert len(fig.hf_data) == 1
927+
assert fig.hf_data[0]["y"].dtype == "int64"
928+
assert fig.data[0]["y"].dtype == "int64"
929+
assert np.all(fig.data[0]["y"] == binary_series)

tests/test_figurewidget_resampler.py

Lines changed: 69 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1596,7 +1596,7 @@ def test_fwr_from_list_dict(float_series):
15961596
assert fr_fig.data[1].uid in fr_fig._hf_data
15971597

15981598
# redo the exercise with a new low-freq trace
1599-
base_fig.append({'y': float_series[:1000], 'name': "s_no_agg"})
1599+
base_fig.append({"y": float_series[:1000], "name": "s_no_agg"})
16001600
fr_fig = FigureWidgetResampler(base_fig, default_n_shown_samples=1000)
16011601
assert len(fr_fig.hf_data) == 2
16021602
assert len(fr_fig.data) == 3
@@ -1626,17 +1626,17 @@ def test_fwr_list_dict_add_trace(float_series):
16261626
assert fr_fig.data[1].uid in fr_fig._hf_data
16271627

16281628
# redo the exercise with a new low-freq trace
1629-
fr_fig.add_trace({'y': float_series[:1000], 'name': "s_no_agg"})
1629+
fr_fig.add_trace({"y": float_series[:1000], "name": "s_no_agg"})
16301630
assert len(fr_fig.hf_data) == 2
16311631
assert len(fr_fig.data) == 3
16321632

16331633
# add low-freq trace but set limit_to_view to True
1634-
fr_fig.add_trace({'y': float_series[:100], 'name': "s_agg"}, limit_to_view=True)
1634+
fr_fig.add_trace({"y": float_series[:100], "name": "s_agg"}, limit_to_view=True)
16351635
assert len(fr_fig.hf_data) == 3
16361636
assert len(fr_fig.data) == 4
16371637

16381638
# add a low-freq trace but adjust max_n_samples
1639-
lf_series = {'y': float_series[:1000], 'name': "s_agg"}
1639+
lf_series = {"y": float_series[:1000], "name": "s_agg"}
16401640
# plotly its default behavior raises a ValueError when a list or tuple is passed
16411641
# to add_trace
16421642
with pytest.raises(ValueError):
@@ -1672,18 +1672,18 @@ def test_fwr_list_dict_add_traces(float_series):
16721672

16731673
# redo the exercise with a new low-freq trace
16741674
# plotly also allows a dict or a scatter object as input
1675-
fr_fig.add_traces({'y': float_series[:1000], 'name': "s_no_agg"})
1675+
fr_fig.add_traces({"y": float_series[:1000], "name": "s_no_agg"})
16761676
assert len(fr_fig.hf_data) == 2
16771677
assert len(fr_fig.data) == 3
16781678

16791679
# add low-freq trace but set limit_to_view to True
1680-
fr_fig.add_traces([{'y': float_series[:100], 'name': "s_agg"}], limit_to_views=True)
1680+
fr_fig.add_traces([{"y": float_series[:100], "name": "s_agg"}], limit_to_views=True)
16811681
assert len(fr_fig.hf_data) == 3
16821682
assert len(fr_fig.data) == 4
16831683

16841684
# add a low-freq trace but adjust max_n_samples
16851685
# note that we use tuple as input
1686-
fr_fig.add_traces(({'y': float_series[:1000], 'name': "s_agg"}, ), max_n_samples=999)
1686+
fr_fig.add_traces(({"y": float_series[:1000], "name": "s_agg"},), max_n_samples=999)
16871687
assert len(fr_fig.hf_data) == 4
16881688
assert len(fr_fig.data) == 5
16891689

@@ -1710,7 +1710,7 @@ def test_fwr_list_scatter_add_traces(float_series):
17101710
assert fr_fig.data[1].uid in fr_fig._hf_data
17111711

17121712
# redo the exercise with a new low-freq trace
1713-
fr_fig.add_traces([go.Scattergl({'y': float_series[:1000], 'name': "s_no_agg"})])
1713+
fr_fig.add_traces([go.Scattergl({"y": float_series[:1000], "name": "s_no_agg"})])
17141714
assert len(fr_fig.hf_data) == 2
17151715
assert len(fr_fig.data) == 3
17161716

@@ -1721,10 +1721,13 @@ def test_fwr_list_scatter_add_traces(float_series):
17211721
assert len(fr_fig.data) == 4
17221722

17231723
# add a low-freq trace but adjust max_n_samples
1724-
fr_fig.add_traces(go.Scatter({'y': float_series[:1000], 'name': "s_agg"}), max_n_samples=999)
1724+
fr_fig.add_traces(
1725+
go.Scatter({"y": float_series[:1000], "name": "s_agg"}), max_n_samples=999
1726+
)
17251727
assert len(fr_fig.hf_data) == 4
17261728
assert len(fr_fig.data) == 5
17271729

1730+
17281731
def test_fwr_add_scatter():
17291732
# Checks whether the add_scatter method works as expected
17301733
# .add_scatter calls `add_traces` under the hood
@@ -1736,3 +1739,60 @@ def test_fwr_add_scatter():
17361739
assert len(fw_orig.data[0].y) == 2_000
17371740
assert len(fw_pr.data[0]["y"]) == 1_000
17381741
assert np.all(fw_orig.data[0].y == fw_pr.hf_data[0]["y"])
1742+
1743+
1744+
def test_fwr_object_hf_data(
1745+
float_series,
1746+
):
1747+
float_series_o = float_series.astype(object)
1748+
1749+
fig = FigureWidgetResampler()
1750+
fig.add_trace({"name": "s0"}, hf_y=float_series_o)
1751+
assert float_series_o.dtype == object
1752+
assert len(fig.hf_data) == 1
1753+
assert fig.hf_data[0]["y"].dtype == "float64"
1754+
assert fig.data[0]["y"].dtype == "float64"
1755+
1756+
1757+
def test_fwr_object_bool_data(bool_series):
1758+
# First try with the original non-object bool series
1759+
fig = FigureWidgetResampler()
1760+
fig.add_trace({"name": "s0"}, hf_y=bool_series)
1761+
assert len(fig.hf_data) == 1
1762+
assert fig.hf_data[0]["y"].dtype == "bool"
1763+
# plotly internally ocnverts this to object
1764+
assert fig.data[0]["y"].dtype == "object"
1765+
1766+
# Now try with the object bool series
1767+
bool_series_o = bool_series.astype(object)
1768+
1769+
fig = FigureWidgetResampler()
1770+
fig.add_trace({"name": "s0"}, hf_y=bool_series_o)
1771+
assert bool_series_o.dtype == object
1772+
assert len(fig.hf_data) == 1
1773+
assert fig.hf_data[0]["y"].dtype == "bool"
1774+
# plotly internally ocnverts this to object
1775+
assert fig.data[0]["y"].dtype == "object"
1776+
1777+
1778+
def test_fwr_object_binary_data():
1779+
binary_series = np.array([0, 1]*20) # as this is << max_n_samples -> limit_to_view
1780+
1781+
# First try with the original non-object binary series
1782+
fig = FigureWidgetResampler()
1783+
fig.add_trace({"name": "s0"}, hf_y=binary_series, limit_to_view=True)
1784+
assert len(fig.hf_data) == 1
1785+
assert fig.hf_data[0]["y"].dtype == "int64"
1786+
assert fig.data[0]["y"].dtype == "int64"
1787+
assert np.all(fig.data[0]["y"] == binary_series)
1788+
1789+
# Now try with the object binary series
1790+
binary_series_o = binary_series.astype(object)
1791+
1792+
fig = FigureWidgetResampler()
1793+
fig.add_trace({"name": "s0"}, hf_y=binary_series_o, limit_to_view=True)
1794+
assert binary_series_o.dtype == object
1795+
assert len(fig.hf_data) == 1
1796+
assert fig.hf_data[0]["y"].dtype == "int64"
1797+
assert fig.data[0]["y"].dtype == "int64"
1798+
assert np.all(fig.data[0]["y"] == binary_series)

0 commit comments

Comments
 (0)