Skip to content

Commit 0fda64c

Browse files
authored
Merge pull request #66 from predict-idlab/rounding_error
🔧 hotfix for rounding error
2 parents df9f98d + 0683d07 commit 0fda64c

File tree

4 files changed

+177
-8
lines changed

4 files changed

+177
-8
lines changed

plotly_resampler/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
__docformat__ = "numpy"
1313
__author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost"
14-
__version__ = "0.6.4.1"
14+
__version__ = "0.6.4.2"
1515

1616
__all__ = [
1717
"__version__",

plotly_resampler/aggregation/aggregators.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,22 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
8181
if s_i.dtype.type == np.datetime64:
8282
# lttbc does not support this datatype -> convert to int
8383
# (where the time is represented in ns)
84-
s_i = s_i.astype(int)
85-
idx, data = lttbc.downsample(s_i, s_v, n_out)
86-
idx = pd.to_datetime(idx, unit="ns", utc=True).tz_convert(s.index.tz)
84+
# REMARK:
85+
# -> additional logic is needed to mitigate rounding errors
86+
# First, the start offset is subtracted, after which the input series
87+
# is set in the already requested format, i.e. np.float64
88+
89+
# NOTE -> Rounding errors can still persist, but this approach is already
90+
# significantly less prone to it than the previos implementation.
91+
s_i0 = s_i[0].astype(np.int64)
92+
idx, data = lttbc.downsample(
93+
(s_i.astype(np.int64) - s_i0).astype(np.float64), s_v, n_out
94+
)
95+
96+
# add the start-offset and convert back to datetime
97+
idx = pd.to_datetime(
98+
idx.astype(np.int64) + s_i0, unit="ns", utc=True
99+
).tz_convert(s.index.tz)
87100
else:
88101
idx, data = lttbc.downsample(s_i, s_v, n_out)
89102
idx = idx.astype(s_i.dtype)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "plotly-resampler" # Do not forget to update the __init__.py __version__ variable
3-
version = "0.6.4.1"
3+
version = "0.6.4.2"
44
description = "Visualizing large time series with plotly"
55
authors = ["Jonas Van Der Donckt", "Jeroen Van Der Donckt", "Emiel Deprost"]
66
readme = "README.md"

tests/test_figurewidget_resampler.py

Lines changed: 159 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
__author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost"
44

55

6-
import pytest
6+
from copy import copy
7+
from datetime import datetime
8+
79
import numpy as np
810
import pandas as pd
9-
from copy import copy
1011
import plotly.graph_objects as go
12+
import pytest
1113
from plotly.subplots import make_subplots
12-
from plotly_resampler import FigureWidgetResampler, EfficientLTTB, EveryNthPoint
14+
from plotly_resampler import EfficientLTTB, EveryNthPoint, FigureWidgetResampler
1315

1416

1517
def test_add_trace_kwarg_space(float_series, bool_series, cat_series):
@@ -1375,3 +1377,157 @@ def test_fwr_adjust_series_text_input():
13751377

13761378
# text === -hovertext -> so the sum should their length
13771379
assert (text == -hovertext).sum() == 1000
1380+
1381+
1382+
def test_fwr_time_based_data_ns():
1383+
n = 100_000
1384+
fig = FigureWidgetResampler(
1385+
default_n_shown_samples=1000, verbose=True, default_downsampler=EfficientLTTB()
1386+
)
1387+
1388+
for i in range(3):
1389+
s = pd.Series(
1390+
index=pd.date_range(
1391+
datetime.now(), freq=f"{np.random.randint(5,100_000)}ns", periods=n
1392+
),
1393+
data=np.arange(n),
1394+
)
1395+
1396+
fig.add_trace(
1397+
go.Scatter(name="hf_text"),
1398+
hf_x=s.index,
1399+
hf_y=s,
1400+
hf_text=s.astype(str),
1401+
hf_hovertext=(-s).astype(str),
1402+
)
1403+
1404+
x = fig.data[i]["x"]
1405+
y = fig.data[i]["y"]
1406+
1407+
assert len(x) == 1000
1408+
assert len(y) == 1000
1409+
1410+
text = fig.data[i]["text"].astype(int)
1411+
hovertext = fig.data[i]["hovertext"].astype(int)
1412+
1413+
assert len(hovertext) == 1000
1414+
assert len(text) == 1000
1415+
1416+
# text === -hovertext -> so the sum should their length
1417+
assert (text == -hovertext).sum() == 1000
1418+
1419+
1420+
def test_fwr_time_based_data_us():
1421+
n = 100_000
1422+
fig = FigureWidgetResampler(
1423+
default_n_shown_samples=1000, verbose=True, default_downsampler=EfficientLTTB()
1424+
)
1425+
1426+
for i in range(3):
1427+
s = pd.Series(
1428+
index=pd.date_range(
1429+
datetime.now(), freq=f"{np.random.randint(5,100_000)}us", periods=n
1430+
),
1431+
data=np.arange(n),
1432+
)
1433+
1434+
fig.add_trace(
1435+
go.Scatter(name="hf_text"),
1436+
hf_x=s.index,
1437+
hf_y=s,
1438+
hf_text=s.astype(str),
1439+
hf_hovertext=(-s).astype(str),
1440+
)
1441+
1442+
x = fig.data[i]["x"]
1443+
y = fig.data[i]["y"]
1444+
1445+
assert len(x) == 1000
1446+
assert len(y) == 1000
1447+
1448+
text = fig.data[i]["text"].astype(int)
1449+
hovertext = fig.data[i]["hovertext"].astype(int)
1450+
1451+
assert len(hovertext) == 1000
1452+
assert len(text) == 1000
1453+
1454+
# text === -hovertext -> so the sum should their length
1455+
assert (text == -hovertext).sum() == 1000
1456+
1457+
1458+
def test_fwr_time_based_data_ms():
1459+
n = 100_000
1460+
fig = FigureWidgetResampler(
1461+
default_n_shown_samples=1000, verbose=True, default_downsampler=EfficientLTTB()
1462+
)
1463+
1464+
for i in range(3):
1465+
s = pd.Series(
1466+
index=pd.date_range(
1467+
datetime.now(), freq=f"{np.random.randint(5,10_000)}ms", periods=n
1468+
),
1469+
data=np.arange(n),
1470+
)
1471+
1472+
fig.add_trace(
1473+
go.Scatter(name="hf_text"),
1474+
hf_x=s.index,
1475+
hf_y=s,
1476+
hf_text=s.astype(str),
1477+
hf_hovertext=(-s).astype(str),
1478+
)
1479+
1480+
x = fig.data[i]["x"]
1481+
y = fig.data[i]["y"]
1482+
1483+
assert len(x) == 1000
1484+
assert len(y) == 1000
1485+
1486+
text = fig.data[i]["text"].astype(int)
1487+
hovertext = fig.data[i]["hovertext"].astype(int)
1488+
1489+
assert len(hovertext) == 1000
1490+
assert len(text) == 1000
1491+
1492+
# text === -hovertext -> so the sum should their length
1493+
assert (text == -hovertext).sum() == 1000
1494+
1495+
1496+
def test_fwr_time_based_data_s():
1497+
n = 100_000
1498+
fig = FigureWidgetResampler(
1499+
default_n_shown_samples=1000, verbose=True, default_downsampler=EfficientLTTB()
1500+
)
1501+
1502+
for i in range(3):
1503+
s = pd.Series(
1504+
index=pd.date_range(
1505+
datetime.now(),
1506+
freq=pd.Timedelta(f"{round(np.abs(np.random.randn()) * 1000, 4)}s"),
1507+
periods=n,
1508+
),
1509+
data=np.arange(n),
1510+
)
1511+
1512+
fig.add_trace(
1513+
go.Scatter(name="hf_text"),
1514+
hf_x=s.index,
1515+
hf_y=s,
1516+
hf_text=s.astype(str),
1517+
hf_hovertext=(-s).astype(str),
1518+
)
1519+
1520+
x = fig.data[i]["x"]
1521+
y = fig.data[i]["y"]
1522+
1523+
assert len(x) == 1000
1524+
assert len(y) == 1000
1525+
1526+
text = fig.data[i]["text"].astype(int)
1527+
hovertext = fig.data[i]["hovertext"].astype(int)
1528+
1529+
assert len(hovertext) == 1000
1530+
assert len(text) == 1000
1531+
1532+
# text === -hovertext -> so the sum should their length
1533+
assert (text == -hovertext).sum() == 1000

0 commit comments

Comments
 (0)