99
1010class DownsampleInterface (ABC ):
1111
12- def __init__ (self ) -> None :
13- super (). __init__ ()
12+ def __init__ (self , name : str ) -> None :
13+ self . name = name
1414
1515 @staticmethod
1616 def _construct_output_series (s : pd .Series , idxs : np .ndarray ) -> pd .Series :
@@ -29,7 +29,7 @@ def _supports_dtype(self, s: pd.Series):
2929 f"{ s .dtype } doesn't match with any regex in { self .dtype_regex_list } "
3030 )
3131
32- def downsample (self , s : pd .Series , n_out : int , parallel : bool = False ) -> pd .Series
32+ def downsample (self , s : pd .Series , n_out : int , parallel : bool = False ) -> pd .Series :
3333 """Downsample a pandas series to n_out samples.
3434
3535 Parameters
@@ -47,6 +47,9 @@ def downsample(self, s: pd.Series, n_out: int, parallel: bool = False) -> pd.Ser
4747 The downsampled series.
4848 """
4949 raise NotImplementedError
50+
51+ def __repr__ (self ) -> str :
52+ return f"{ self .name } "
5053
5154# ------------------- Rust Downsample Interface -------------------
5255
@@ -59,14 +62,14 @@ def _switch_mod_with_y(y_dtype: np.dtype, mod: ModuleType, downsample_func: str
5962 ----------
6063 y_dtype : np.dtype
6164 The dtype of the y-data
62- mod : Module
65+ mod : ModuleType
6366 The module to select the appropriate function from
6467 downsample_func : str, optional
6568 The name of the function to use, by default DOWNSAMPLE_FUNC.
6669 """
6770 # FLOATS
6871 if np .issubdtype (y_dtype , np .floating ):
69- if y . dtype == np .float16 :
72+ if y_dtype == np .float16 :
7073 return getattr (mod , downsample_func + '_f16' )
7174 elif y_dtype == np .float32 :
7275 return getattr (mod , downsample_func + '_f32' )
@@ -105,33 +108,33 @@ def _switch_mod_with_x_and_y(x_dtype: np.dtype, y_dtype: np.dtype, mod: ModuleTy
105108 The dtype of the x-data
106109 y_dtype : np.dtype
107110 The dtype of the y-data
108- mod : Module
111+ mod : ModuleType
109112 The module to select the appropriate function from
110113 """
111114 # FLOATS
112115 if np .issubdtype (x_dtype , np .floating ):
113116 if x_dtype == np .float16 :
114- return switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _f16' )
117+ return _switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _f16' )
115118 elif x_dtype == np .float32 :
116- return switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _f32' )
119+ return _switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _f32' )
117120 elif x_dtype == np .float64 :
118- return switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _f64' )
121+ return _switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _f64' )
119122 # INTS
120123 elif np .issubdtype (x_dtype , np .integer ):
121124 if x_dtype == np .int16 :
122- return switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _i16' )
125+ return _switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _i16' )
123126 elif x_dtype == np .int32 :
124- return switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _i32' )
127+ return _switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _i32' )
125128 elif x_dtype == np .int64 :
126- return switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _i64' )
129+ return _switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _i64' )
127130 # UINTS
128131 elif np .issubdtype (x_dtype , np .unsignedinteger ):
129132 if x_dtype == np .uint16 :
130- return switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _u16' )
133+ return _switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _u16' )
131134 elif x_dtype == np .uint32 :
132- return switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _u32' )
135+ return _switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _u32' )
133136 elif x_dtype == np .uint64 :
134- return switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _u64' )
137+ return _switch_mod_with_y (y_dtype , mod , f'{ DOWNSAMPLE_F } _u64' )
135138 # BOOLS
136139 # TODO: support bools
137140 # elif data_dtype == np.bool:
@@ -140,14 +143,25 @@ def _switch_mod_with_x_and_y(x_dtype: np.dtype, y_dtype: np.dtype, mod: ModuleTy
140143
141144class RustDownsamplingInterface (DownsampleInterface ):
142145
143- def __init__ (self , resampling_mod : Module ) -> None :
144- self ._mod = resampling_mod
145- if hasattr (self .mod , 'simd' ):
146- self .mod_single_core = self ._mod .simd
147- self .mod_multi_core = self ._mod .simd_parallel
148- else :
149- self .mod_single_core = self ._mod .scalar
150- self .mod_multi_core = self ._mod .scalar_parallel
146+ def __init__ (self , name : str , resampling_mod : ModuleType ) -> None :
147+ super ().__init__ (name + " [tsdownsample_rs]" )
148+ self .rust_mod = resampling_mod
149+
150+ # Store the single core sub module
151+ self .mod_single_core = self .rust_mod .scalar
152+ if hasattr (self .rust_mod , "simd" ):
153+ # use SIMD implementation if available
154+ self .mod_single_core = self .rust_mod .simd
155+
156+ # Store the multi-core sub module (if present)
157+ self .mod_multi_core = None # no multi-core implementation (default)
158+ if hasattr (self .rust_mod , "simd_parallel" ):
159+ # use SIMD implementation if available
160+ self .mod_multi_core = self .rust_mod .simd_parallel
161+ elif hasattr (self .rust_mod , "scalar_parallel" ):
162+ # use scalar implementation if available (when no SIMD available)
163+ self .mod_multi_core = self .rust_mod .scalar_parallel
164+
151165
152166 def _downsample_without_x (self , s : pd .Series , n_out : int ) -> pd .Series :
153167 downsample_method = _switch_mod_with_y (s .dtype , self .mod_single_core )
@@ -170,7 +184,10 @@ def _downsample_with_x_parallel(self, s: pd.Series, n_out: int) -> pd.Series:
170184 return self ._construct_output_series (s , idxs )
171185
172186 def downsample (self , s : pd .Series , n_out : int , parallel : bool = False ) -> pd .Series :
173- if s .index .freq is None : # TODO: or the other way around??
187+ fixed_sr = False
188+ if isinstance (s .index , pd .RangeIndex ) or s .index .freq is not None :
189+ fixed_sr = True
190+ if fixed_sr : # TODO: or the other way around??
174191 if parallel :
175192 return self ._downsample_without_x_parallel (s , n_out )
176193 else :
@@ -183,10 +200,39 @@ def downsample(self, s: pd.Series, n_out: int, parallel: bool = False) -> pd.Ser
183200
184201# ------------------ Numpy Downsample Interface ------------------
185202
186- class NumpyDownsamplingInterface ( ):
203+ class FuncDownsamplingInterface ( DownsampleInterface ):
187204
188- def __init__ (self , resampling_func : Callable ) -> None :
189- self ._func = resampling_func
205+ def __init__ (self , name : str , downsample_func : Callable ) -> None :
206+ super ().__init__ ("[Func]_" + name )
207+ self .downsample_func = downsample_func
190208
191209 def downsample (self , s : pd .Series , n_out : int , parallel : bool = False ) -> pd .Series :
192-
210+ if isinstance (s .index , pd .DatetimeIndex ):
211+ t_start , t_end = s .index [:: len (s ) - 1 ]
212+ rate = (t_end - t_start ) / n_out
213+ return s .resample (rate ).apply (self .downsample_func ).dropna ()
214+
215+ # no time index -> use the every nth heuristic
216+ group_size = max (1 , np .ceil (len (s ) / n_out ))
217+ s_out = (
218+ s .groupby (
219+ # create an array of [0, 0, 0, ...., n_out, n_out]
220+ # where each value is repeated based $len(s)/n_out$ times
221+ by = np .repeat (np .arange (n_out ), group_size )[: len (s )]
222+ )
223+ .agg (self .downsample_func )
224+ .dropna ()
225+ )
226+ # Create an index-estimation for real-time data
227+ # Add one to the index so it's pointed at the end of the window
228+ # Note: this can be adjusted to .5 to center the data
229+ # Multiply it with the group size to get the real index-position
230+ # TODO: add option to select start / middle / end as index
231+ idx_locs = (np .arange (len (s_out )) + 1 ) * group_size
232+ idx_locs [- 1 ] = len (s ) - 1
233+ return pd .Series (
234+ index = s .iloc [idx_locs .astype (s .index .dtype )].index .astype (s .index .dtype ),
235+ data = s_out .values ,
236+ name = str (s .name ),
237+ copy = False ,
238+ )
0 commit comments