Merge pull request #66 from predict-idlab/rounding_error

jonasvdd · web-flow · commit 0fda64cbfdeb · 2022-05-19T19:27:42.000+02:00
🔧 hotfix for rounding error
diff --git a/plotly_resampler/__init__.py b/plotly_resampler/__init__.py
@@ -11,7 +11,7 @@
 
 __docformat__ = "numpy"
 __author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost"
-__version__ = "0.6.4.1"
+__version__ = "0.6.4.2"
 
 __all__ = [
     "__version__",
diff --git a/plotly_resampler/aggregation/aggregators.py b/plotly_resampler/aggregation/aggregators.py
@@ -81,9 +81,22 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
         if s_i.dtype.type == np.datetime64:
             # lttbc does not support this datatype -> convert to int
             # (where the time is represented in ns)
-            s_i = s_i.astype(int)
-            idx, data = lttbc.downsample(s_i, s_v, n_out)
-            idx = pd.to_datetime(idx, unit="ns", utc=True).tz_convert(s.index.tz)
+            # REMARK:
+            #   -> additional logic is needed to mitigate rounding errors 
+            #   First, the start offset is subtracted, after which the input series
+            #   is set in the already requested format, i.e. np.float64
+
+            # NOTE -> Rounding errors can still persist, but this approach is already
+            #         significantly less prone to it than the previos implementation.
+            s_i0 = s_i[0].astype(np.int64)
+            idx, data = lttbc.downsample(
+                (s_i.astype(np.int64) - s_i0).astype(np.float64), s_v, n_out
+            )
+
+            # add the start-offset and convert back to datetime
+            idx = pd.to_datetime(
+                idx.astype(np.int64) + s_i0, unit="ns", utc=True
+            ).tz_convert(s.index.tz)
         else:
             idx, data = lttbc.downsample(s_i, s_v, n_out)
             idx = idx.astype(s_i.dtype)
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "plotly-resampler"  # Do not forget to update the __init__.py __version__ variable
-version = "0.6.4.1"
+version = "0.6.4.2"
 description = "Visualizing large time series with plotly"
 authors = ["Jonas Van Der Donckt", "Jeroen Van Der Donckt", "Emiel Deprost"]
 readme = "README.md"
diff --git a/tests/test_figurewidget_resampler.py b/tests/test_figurewidget_resampler.py
@@ -3,13 +3,15 @@
 __author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost"
 
 
-import pytest
+from copy import copy
+from datetime import datetime
+
 import numpy as np
 import pandas as pd
-from copy import copy
 import plotly.graph_objects as go
+import pytest
 from plotly.subplots import make_subplots
-from plotly_resampler import FigureWidgetResampler, EfficientLTTB, EveryNthPoint
+from plotly_resampler import EfficientLTTB, EveryNthPoint, FigureWidgetResampler
 
 
 def test_add_trace_kwarg_space(float_series, bool_series, cat_series):
@@ -1375,3 +1377,157 @@ def test_fwr_adjust_series_text_input():
 
     # text === -hovertext -> so the sum should their length
     assert (text == -hovertext).sum() == 1000
+
+
+def test_fwr_time_based_data_ns():
+    n = 100_000
+    fig = FigureWidgetResampler(
+        default_n_shown_samples=1000, verbose=True, default_downsampler=EfficientLTTB()
+    )
+
+    for i in range(3):
+        s = pd.Series(
+            index=pd.date_range(
+                datetime.now(), freq=f"{np.random.randint(5,100_000)}ns", periods=n
+            ),
+            data=np.arange(n),
+        )
+
+        fig.add_trace(
+            go.Scatter(name="hf_text"),
+            hf_x=s.index,
+            hf_y=s,
+            hf_text=s.astype(str),
+            hf_hovertext=(-s).astype(str),
+        )
+
+        x = fig.data[i]["x"]
+        y = fig.data[i]["y"]
+
+        assert len(x) == 1000
+        assert len(y) == 1000
+
+        text = fig.data[i]["text"].astype(int)
+        hovertext = fig.data[i]["hovertext"].astype(int)
+
+        assert len(hovertext) == 1000
+        assert len(text) == 1000
+
+        # text === -hovertext -> so the sum should their length
+        assert (text == -hovertext).sum() == 1000
+
+
+def test_fwr_time_based_data_us():
+    n = 100_000
+    fig = FigureWidgetResampler(
+        default_n_shown_samples=1000, verbose=True, default_downsampler=EfficientLTTB()
+    )
+
+    for i in range(3):
+        s = pd.Series(
+            index=pd.date_range(
+                datetime.now(), freq=f"{np.random.randint(5,100_000)}us", periods=n
+            ),
+            data=np.arange(n),
+        )
+
+        fig.add_trace(
+            go.Scatter(name="hf_text"),
+            hf_x=s.index,
+            hf_y=s,
+            hf_text=s.astype(str),
+            hf_hovertext=(-s).astype(str),
+        )
+
+        x = fig.data[i]["x"]
+        y = fig.data[i]["y"]
+
+        assert len(x) == 1000
+        assert len(y) == 1000
+
+        text = fig.data[i]["text"].astype(int)
+        hovertext = fig.data[i]["hovertext"].astype(int)
+
+        assert len(hovertext) == 1000
+        assert len(text) == 1000
+
+        # text === -hovertext -> so the sum should their length
+        assert (text == -hovertext).sum() == 1000
+
+
+def test_fwr_time_based_data_ms():
+    n = 100_000
+    fig = FigureWidgetResampler(
+        default_n_shown_samples=1000, verbose=True, default_downsampler=EfficientLTTB()
+    )
+
+    for i in range(3):
+        s = pd.Series(
+            index=pd.date_range(
+                datetime.now(), freq=f"{np.random.randint(5,10_000)}ms", periods=n
+            ),
+            data=np.arange(n),
+        )
+
+        fig.add_trace(
+            go.Scatter(name="hf_text"),
+            hf_x=s.index,
+            hf_y=s,
+            hf_text=s.astype(str),
+            hf_hovertext=(-s).astype(str),
+        )
+
+        x = fig.data[i]["x"]
+        y = fig.data[i]["y"]
+
+        assert len(x) == 1000
+        assert len(y) == 1000
+
+        text = fig.data[i]["text"].astype(int)
+        hovertext = fig.data[i]["hovertext"].astype(int)
+
+        assert len(hovertext) == 1000
+        assert len(text) == 1000
+
+        # text === -hovertext -> so the sum should their length
+        assert (text == -hovertext).sum() == 1000
+
+
+def test_fwr_time_based_data_s():
+    n = 100_000
+    fig = FigureWidgetResampler(
+        default_n_shown_samples=1000, verbose=True, default_downsampler=EfficientLTTB()
+    )
+
+    for i in range(3):
+        s = pd.Series(
+            index=pd.date_range(
+                datetime.now(),
+                freq=pd.Timedelta(f"{round(np.abs(np.random.randn()) * 1000, 4)}s"),
+                periods=n,
+            ),
+            data=np.arange(n),
+        )
+
+        fig.add_trace(
+            go.Scatter(name="hf_text"),
+            hf_x=s.index,
+            hf_y=s,
+            hf_text=s.astype(str),
+            hf_hovertext=(-s).astype(str),
+        )
+
+        x = fig.data[i]["x"]
+        y = fig.data[i]["y"]
+
+        assert len(x) == 1000
+        assert len(y) == 1000
+
+        text = fig.data[i]["text"].astype(int)
+        hovertext = fig.data[i]["hovertext"].astype(int)
+
+        assert len(hovertext) == 1000
+        assert len(text) == 1000
+
+        # text === -hovertext -> so the sum should their length
+        assert (text == -hovertext).sum() == 1000