Skip to content

Commit 5135b60

Browse files
authored
πŸ› Small bugfixes
πŸ› Small bugfixes
2 parents 3d8f6af + e9a5c1b commit 5135b60

File tree

6 files changed

+61
-41
lines changed

6 files changed

+61
-41
lines changed

β€Žplotly_resampler/__init__.pyβ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
__docformat__ = "numpy"
1313
__author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost"
14-
__version__ = "0.5.0"
14+
__version__ = "0.6.3"
1515

1616
__all__ = [
1717
"__version__",

β€Žplotly_resampler/aggregation/aggregation_interface.pyβ€Ž

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,9 @@ def _supports_dtype(self, s: pd.Series):
5454
@staticmethod
5555
def _calc_med_diff(s: pd.Series) -> Tuple[float, np.ndarray]:
5656
# ----- divide and conquer heuristic to calculate the median diff ------
57-
s_idx_diff = np.diff(s.index.values) # remark: s_idx_diff.shape === len(s) -1
57+
# remark: thanks to the prepend -> s_idx_diff.shape === len(s)
58+
siv = s.index.values
59+
s_idx_diff = np.diff(s.index.values, prepend=siv[0])
5860

5961
# To do so - use a quantile-based (median) approach where we reshape the data
6062
# into `n_blocks` blocks and calculate the min
@@ -66,9 +68,11 @@ def _calc_med_diff(s: pd.Series) -> Tuple[float, np.ndarray]:
6668
sid_v: np.ndarray = s_idx_diff[: blck_size * n_blcks].reshape(n_blcks, -1)
6769

6870
# calculate the min and max and calculate the median on that
69-
med_diff = np.median(np.concatenate((sid_v.min(axis=0), sid_v.max(axis=0))))
71+
med_diff = np.quantile(
72+
np.concatenate((sid_v.min(axis=0), sid_v.max(axis=0))), q=0.55
73+
)
7074
else:
71-
med_diff = np.median(s_idx_diff)
75+
med_diff = np.quantile(s_idx_diff, q=0.55)
7276

7377
return med_diff, s_idx_diff
7478

@@ -77,17 +81,17 @@ def _insert_gap_none(self, s: pd.Series) -> pd.Series:
7781
med_diff, s_idx_diff = self._calc_med_diff(s)
7882
# add None data-points in-between the gaps
7983
if med_diff is not None:
80-
df_gap_idx = s.index.values[1:][s_idx_diff > 3 * med_diff]
84+
df_gap_idx = s.index.values[s_idx_diff > 3 * med_diff]
8185
if len(df_gap_idx):
8286
df_res_gap = pd.Series(
8387
index=df_gap_idx, data=None, name=s.name, copy=False
8488
)
8589

8690
if isinstance(df_res_gap.index, pd.DatetimeIndex):
87-
# Due to the s.index`.values` cast, df_res_gap has lost
91+
# Due to the s.index`.values` cast, df_res_gap has lost
8892
# time-information, so now we restore it
89-
df_res_gap.index = (
90-
df_res_gap.index.tz_localize('UTC').tz_convert(s.index.tz)
93+
df_res_gap.index = df_res_gap.index.tz_localize("UTC").tz_convert(
94+
s.index.tz
9195
)
9296

9397
# Note:
@@ -104,8 +108,7 @@ def _replace_gap_end_none(self, s: pd.Series) -> pd.Series:
104108
med_diff, s_idx_diff = self._calc_med_diff(s)
105109
if med_diff is not None:
106110
# Replace data-points with None where the gaps occur
107-
s.iloc[1:].loc[s_idx_diff > 3 * med_diff] = None
108-
111+
s.loc[s_idx_diff > 3 * med_diff] = None
109112
return s
110113

111114
def aggregate(self, s: pd.Series, n_out: int) -> pd.Series:

β€Žplotly_resampler/aggregation/aggregators.pyβ€Ž

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,10 @@ class LTTB(AbstractSeriesAggregator):
4444
4545
"""
4646

47-
def __init__(self, interleave_gaps: bool = True, ):
47+
def __init__(
48+
self,
49+
interleave_gaps: bool = True,
50+
):
4851
"""
4952
Parameters
5053
----------
@@ -56,7 +59,7 @@ def __init__(self, interleave_gaps: bool = True, ):
5659
super().__init__(
5760
interleave_gaps,
5861
dtype_regex_list=[rf"{dtype}\d*" for dtype in ["float", "int", "uint"]]
59-
+ ["category", "bool"],
62+
+ ["category", "bool"],
6063
)
6164

6265
def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
@@ -66,11 +69,11 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
6669
s_i = s.index.values
6770

6871
if s_i.dtype.type == np.datetime64:
69-
# lttbc does not support this datatype -> convert to int
72+
# lttbc does not support this datatype -> convert to int
7073
# (where the time is represented in ns)
7174
s_i = s_i.astype(int)
7275
idx, data = lttbc.downsample(s_i, s_v, n_out)
73-
idx = pd.to_datetime(idx, unit='ns', utc=True).tz_convert(s.index.tz)
76+
idx = pd.to_datetime(idx, unit="ns", utc=True).tz_convert(s.index.tz)
7477
else:
7578
idx, data = lttbc.downsample(s_i, s_v, n_out)
7679
idx = idx.astype(s_i.dtype)
@@ -129,12 +132,14 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
129132
# add the corresponding offset
130133
argmin = (
131134
s[: block_size * offset.shape[0]]
132-
.values.reshape(-1, block_size).argmin(axis=1)
135+
.values.reshape(-1, block_size)
136+
.argmin(axis=1)
133137
+ offset
134138
)
135139
argmax = (
136-
s[argmax_offset: block_size * offset.shape[0] + argmax_offset]
137-
.values.reshape(-1, block_size).argmax(axis=1)
140+
s[argmax_offset : block_size * offset.shape[0] + argmax_offset]
141+
.values.reshape(-1, block_size)
142+
.argmax(axis=1)
138143
+ offset
139144
+ argmax_offset
140145
)
@@ -150,7 +155,7 @@ class MinMaxAggregator(AbstractSeriesAggregator):
150155
.. note::
151156
This method is rather efficient when scaling to large data sizes and can be used
152157
as a data-reduction step before feeding it to the :class:`LTTB <LTTB>`
153-
algorithm, as :class:`EfficientLTTB <EfficientLTTB>` does with the
158+
algorithm, as :class:`EfficientLTTB <EfficientLTTB>` does with the
154159
:class:`MinMaxOverlapAggregator <MinMaxOverlapAggregator>`.
155160
156161
"""
@@ -173,22 +178,35 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
173178
block_size = math.ceil(s.shape[0] / n_out * 2)
174179

175180
# Calculate the offset range which will be added to the argmin and argmax pos
176-
offset = np.arange(
177-
0, stop=s.shape[0] - block_size, step=block_size
178-
)
181+
offset = np.arange(0, stop=s.shape[0] - block_size, step=block_size)
179182

180183
# Calculate the argmin & argmax on the reshaped view of `s` &
181184
# add the corresponding offset
182185
argmin = (
183186
s[: block_size * offset.shape[0]]
184-
.values.reshape(-1, block_size).argmin(axis=1)
187+
.values.reshape(-1, block_size)
188+
.argmin(axis=1)
185189
+ offset
186190
)
187191
argmax = (
188192
s[: block_size * offset.shape[0]]
189-
.values.reshape(-1, block_size).argmax(axis=1)
193+
.values.reshape(-1, block_size)
194+
.argmax(axis=1)
190195
+ offset
191196
)
197+
198+
# Note: the implementation below flips the array to search from
199+
# right-to left (as min or max will always usee the first same minimum item,
200+
# i.e. the most left item)
201+
# This however creates a large computational overhead -> we do not use this
202+
# implementation and suggest using the minmaxaggregator.
203+
# argmax = (
204+
# (block_size - 1)
205+
# - np.fliplr(
206+
# s[: block_size * offset.shape[0]].values.reshape(-1, block_size)
207+
# ).argmax(axis=1)
208+
# ) + offset
209+
192210
# Sort the argmin & argmax (where we append the first and last index item)
193211
# and then slice the original series on these indexes.
194212
return s.iloc[np.unique(np.concatenate((argmin, argmax, [0, s.shape[0] - 1])))]
@@ -209,14 +227,18 @@ def __init__(self, interleave_gaps: bool = True):
209227
sampled data. A quantile-based approach is used to determine the gaps /
210228
irregularly sampled data. By default, True.
211229
"""
212-
self.lttb = LTTB(interleave_gaps=interleave_gaps)
213-
self.minmax = MinMaxOverlapAggregator(interleave_gaps=interleave_gaps)
214-
super().__init__(interleave_gaps, dtype_regex_list=None)
230+
self.lttb = LTTB(interleave_gaps=False)
231+
self.minmax = MinMaxOverlapAggregator(interleave_gaps=False)
232+
super().__init__(
233+
interleave_gaps,
234+
dtype_regex_list=[rf"{dtype}\d*" for dtype in ["float", "int", "uint"]]
235+
+ ["category", "bool"],
236+
)
215237

216238
def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
217239
if s.shape[0] > n_out * 1_000:
218240
s = self.minmax._aggregate(s, n_out * 50)
219-
return self.lttb.aggregate(s, n_out)
241+
return self.lttb._aggregate(s, n_out)
220242

221243

222244
class EveryNthPoint(AbstractSeriesAggregator):
@@ -249,7 +271,7 @@ class FuncAggregator(AbstractSeriesAggregator):
249271
"""
250272

251273
def __init__(
252-
self, aggregation_func, interleave_gaps: bool = True, dtype_regex_list=None
274+
self, aggregation_func, interleave_gaps: bool = True, dtype_regex_list=None
253275
):
254276
"""
255277
Parameters

β€Žplotly_resampler/figure_resampler/figure_resampler_interface.pyβ€Ž

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -602,20 +602,15 @@ def add_trace(
602602
trace.uid = uuid
603603

604604
hf_x = (
605-
trace["x"]
606-
if hasattr(trace, "x") and hf_x is None
607-
else hf_x.values
608-
if isinstance(hf_x, pd.Series)
609-
else hf_x
605+
trace["x"] if hasattr(trace, "x") and hf_x is None
606+
else hf_x.values if isinstance(hf_x, pd.Series)
607+
else hf_x if isinstance(hf_x, pd.Index)
608+
else np.asarray(hf_x)
610609
)
611-
if isinstance(hf_x, tuple):
612-
hf_x = list(hf_x)
613610

614611
hf_y = (
615-
trace["y"]
616-
if hasattr(trace, "y") and hf_y is None
617-
else hf_y.values
618-
if isinstance(hf_y, pd.Series)
612+
trace["y"] if hasattr(trace, "y") and hf_y is None
613+
else hf_y.values if isinstance(hf_y, (pd.Series, pd.Index))
619614
else hf_y
620615
)
621616
hf_y = np.asarray(hf_y)

β€Žpyproject.tomlβ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "plotly-resampler" # Do not forget to update the __init__.py __version__ variable
3-
version = "0.5.0"
3+
version = "0.6.3"
44
description = "Visualizing large time series with plotly"
55
authors = ["Jonas Van Der Donckt", "Jeroen Van Der Donckt", "Emiel Deprost"]
66
readme = "README.md"

β€Žtests/conftest.pyβ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def cat_series() -> pd.Series:
5959

6060
@pytest.fixture
6161
def bool_series() -> pd.Series:
62-
bool_list = [True, False, True, True, True, True]
62+
bool_list = [True, False, True, True, True, True] + [True] * 50
6363
return pd.Series(bool_list * (_nb_samples // len(bool_list)), dtype="bool")
6464

6565

0 commit comments

Comments
Β (0)