@@ -44,7 +44,10 @@ class LTTB(AbstractSeriesAggregator):
4444
4545 """
4646
47- def __init__ (self , interleave_gaps : bool = True , ):
47+ def __init__ (
48+ self ,
49+ interleave_gaps : bool = True ,
50+ ):
4851 """
4952 Parameters
5053 ----------
@@ -56,7 +59,7 @@ def __init__(self, interleave_gaps: bool = True, ):
5659 super ().__init__ (
5760 interleave_gaps ,
5861 dtype_regex_list = [rf"{ dtype } \d*" for dtype in ["float" , "int" , "uint" ]]
59- + ["category" , "bool" ],
62+ + ["category" , "bool" ],
6063 )
6164
6265 def _aggregate (self , s : pd .Series , n_out : int ) -> pd .Series :
@@ -66,11 +69,11 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
6669 s_i = s .index .values
6770
6871 if s_i .dtype .type == np .datetime64 :
69- # lttbc does not support this datatype -> convert to int
72+ # lttbc does not support this datatype -> convert to int
7073 # (where the time is represented in ns)
7174 s_i = s_i .astype (int )
7275 idx , data = lttbc .downsample (s_i , s_v , n_out )
73- idx = pd .to_datetime (idx , unit = 'ns' , utc = True ).tz_convert (s .index .tz )
76+ idx = pd .to_datetime (idx , unit = "ns" , utc = True ).tz_convert (s .index .tz )
7477 else :
7578 idx , data = lttbc .downsample (s_i , s_v , n_out )
7679 idx = idx .astype (s_i .dtype )
@@ -129,12 +132,14 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
129132 # add the corresponding offset
130133 argmin = (
131134 s [: block_size * offset .shape [0 ]]
132- .values .reshape (- 1 , block_size ).argmin (axis = 1 )
135+ .values .reshape (- 1 , block_size )
136+ .argmin (axis = 1 )
133137 + offset
134138 )
135139 argmax = (
136- s [argmax_offset : block_size * offset .shape [0 ] + argmax_offset ]
137- .values .reshape (- 1 , block_size ).argmax (axis = 1 )
140+ s [argmax_offset : block_size * offset .shape [0 ] + argmax_offset ]
141+ .values .reshape (- 1 , block_size )
142+ .argmax (axis = 1 )
138143 + offset
139144 + argmax_offset
140145 )
@@ -150,7 +155,7 @@ class MinMaxAggregator(AbstractSeriesAggregator):
150155 .. note::
151156 This method is rather efficient when scaling to large data sizes and can be used
152157 as a data-reduction step before feeding it to the :class:`LTTB <LTTB>`
153- algorithm, as :class:`EfficientLTTB <EfficientLTTB>` does with the
158+ algorithm, as :class:`EfficientLTTB <EfficientLTTB>` does with the
154159 :class:`MinMaxOverlapAggregator <MinMaxOverlapAggregator>`.
155160
156161 """
@@ -173,22 +178,35 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
173178 block_size = math .ceil (s .shape [0 ] / n_out * 2 )
174179
175180 # Calculate the offset range which will be added to the argmin and argmax pos
176- offset = np .arange (
177- 0 , stop = s .shape [0 ] - block_size , step = block_size
178- )
181+ offset = np .arange (0 , stop = s .shape [0 ] - block_size , step = block_size )
179182
180183 # Calculate the argmin & argmax on the reshaped view of `s` &
181184 # add the corresponding offset
182185 argmin = (
183186 s [: block_size * offset .shape [0 ]]
184- .values .reshape (- 1 , block_size ).argmin (axis = 1 )
187+ .values .reshape (- 1 , block_size )
188+ .argmin (axis = 1 )
185189 + offset
186190 )
187191 argmax = (
188192 s [: block_size * offset .shape [0 ]]
189- .values .reshape (- 1 , block_size ).argmax (axis = 1 )
193+ .values .reshape (- 1 , block_size )
194+ .argmax (axis = 1 )
190195 + offset
191196 )
197+
198+ # Note: the implementation below flips the array to search from
199+ # right-to left (as min or max will always usee the first same minimum item,
200+ # i.e. the most left item)
201+ # This however creates a large computational overhead -> we do not use this
202+ # implementation and suggest using the minmaxaggregator.
203+ # argmax = (
204+ # (block_size - 1)
205+ # - np.fliplr(
206+ # s[: block_size * offset.shape[0]].values.reshape(-1, block_size)
207+ # ).argmax(axis=1)
208+ # ) + offset
209+
192210 # Sort the argmin & argmax (where we append the first and last index item)
193211 # and then slice the original series on these indexes.
194212 return s .iloc [np .unique (np .concatenate ((argmin , argmax , [0 , s .shape [0 ] - 1 ])))]
@@ -209,14 +227,18 @@ def __init__(self, interleave_gaps: bool = True):
209227 sampled data. A quantile-based approach is used to determine the gaps /
210228 irregularly sampled data. By default, True.
211229 """
212- self .lttb = LTTB (interleave_gaps = interleave_gaps )
213- self .minmax = MinMaxOverlapAggregator (interleave_gaps = interleave_gaps )
214- super ().__init__ (interleave_gaps , dtype_regex_list = None )
230+ self .lttb = LTTB (interleave_gaps = False )
231+ self .minmax = MinMaxOverlapAggregator (interleave_gaps = False )
232+ super ().__init__ (
233+ interleave_gaps ,
234+ dtype_regex_list = [rf"{ dtype } \d*" for dtype in ["float" , "int" , "uint" ]]
235+ + ["category" , "bool" ],
236+ )
215237
216238 def _aggregate (self , s : pd .Series , n_out : int ) -> pd .Series :
217239 if s .shape [0 ] > n_out * 1_000 :
218240 s = self .minmax ._aggregate (s , n_out * 50 )
219- return self .lttb .aggregate (s , n_out )
241+ return self .lttb ._aggregate (s , n_out )
220242
221243
222244class EveryNthPoint (AbstractSeriesAggregator ):
@@ -249,7 +271,7 @@ class FuncAggregator(AbstractSeriesAggregator):
249271 """
250272
251273 def __init__ (
252- self , aggregation_func , interleave_gaps : bool = True , dtype_regex_list = None
274+ self , aggregation_func , interleave_gaps : bool = True , dtype_regex_list = None
253275 ):
254276 """
255277 Parameters
0 commit comments