Skip to content

Commit ec2f80a

Browse files
jvddjonasvdd
andauthored
✨ new interface + ♻️ major code updates (#6)
* 🚧 WIP * 🚧 * 🚧 * 🚧 * ✨ new interface * 🧹 * 🧹 formatting * 🧹 fix linting * 🚀 new alpha release (#7) * 🚧 WIP * 🧹 cleanup type hints * 🧹 * 🐛 use multi-core if parallel=True * 🖊️ update based on code review * 🖍️ code review * 🙈 formatting * 🔥 support datetime64 * ✨ support int8, uint8, & bool * 🐛 fix overflow issue * 🧹 cleanup tests * 🧹 cleanup readme for first main release * ☕ support datetime64 as y as well * ✨ support timedelta64 y * ☎️ support timedelta64 x * 🎨 update readme Co-authored-by: jonasvdd <[email protected]>
1 parent ff3cc2c commit ec2f80a

File tree

14 files changed

+837
-356
lines changed

14 files changed

+837
-356
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
.vscode/*
12
venv/
23
TODO.md
34
main.rs

Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ black = black tsdownsample tests
55
install:
66
pip install -e .
77

8+
.PHONY: install-dev-requirements
9+
install-dev-requirements:
10+
pip install -r tests/requirements.txt
11+
pip install -r tests/requirements-linting.txt
12+
813
.PHONY: format
914
format:
1015
$(isort)

README.md

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
[![PyPI Latest Release](https://img.shields.io/pypi/v/tsdownsample.svg)](https://pypi.org/project/tsdownsample/)
44
[![support-version](https://img.shields.io/pypi/pyversions/tsdownsample)](https://img.shields.io/pypi/pyversions/tsdownsample)
55
[![Downloads](https://pepy.tech/badge/tsdownsample)](https://pepy.tech/project/tsdownsample)
6-
<!-- [![Testing](https://github.com/predict-idlab/tsflex/actions/workflows/test.yml/badge.svg)](https://github.com/predict-idlab/tsflex/actions/workflows/test.yml) -->
6+
[![Testing](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-downsample_rs.yml/badge.svg)](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-downsample_rs.yml)
7+
[![Testing](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-tsdownsample.yml/badge.svg)](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-tsdownsample.yml)
8+
<!-- TODO: codecov -->
79

810
**📈 Time series downsampling** algorithms for visualization
911

@@ -25,20 +27,21 @@
2527
- works on views of the data (no copies)
2628
- no intermediate data structures are created
2729
* **Flexible**: works on any type of data
28-
- supported datatypes are `f16`, `f32`, `f64`, `i16`, `i32`, `i64`, `u16`, `u32`, `u64`
30+
- supported datatypes are
31+
- for `x`: `f16`, `f32`, `f64`, `i16`, `i32`, `i64`, `u16`, `u32`, `u64`, `datetime64`
32+
- for `y`: `f16`, `f32`, `f64`, `i8`, `i16`, `i32`, `i64`, `u8`, `u16`, `u32`, `u64`, `bool`
2933
<details>
3034
<summary><i>!! 🚀 <code>f16</code> <a href="https://github.com/jvdd/argminmax">argminmax</a> is 200-300x faster than numpy</i></summary>
3135
In contrast with all other data types above, <code>f16</code> is *not* hardware supported (i.e., no instructions for f16) by most modern CPUs!! <br>
32-
🐌 Programming languages facilitate support for this datatype by either (i) upcasting to `f32` or (ii) using a software implementation. <br>
36+
🐌 Programming languages facilitate support for this datatype by either (i) upcasting to <u>f32</u> or (ii) using a software implementation. <br>
3337
💡 As for argminmax, only comparisons are needed - and thus no arithmetic operations - creating a <u>symmetrical ordinal mapping from <code>f16</code> to <code>i16</code></u> is sufficient. This mapping allows to use the hardware supported scalar and SIMD <code>i16</code> instructions - while not producing any memory overhead 🎉 <br>
3438
<i>More details are described in <a href="https://github.com/jvdd/argminmax/pull/1">argminmax PR #1</a>.</i>
3539
</details>
3640
* **Easy to use**: simple & flexible API
3741

3842
## Install
3943

40-
> ❗🚨❗ This package is currently under development - no stable release yet ❗🚨❗
41-
44+
> ❗🚨❗ This package is currently under development - correct installation is not yet guaranteed ❗🚨❗
4245
4346
```bash
4447
pip install tsdownsample
@@ -48,16 +51,25 @@ pip install tsdownsample
4851

4952
```python
5053
from tsdownsample import MinMaxLTTBDownsampler
51-
import pandas as pd; import numpy as np
54+
import numpy as np
5255

5356
# Create a time series
5457
y = np.random.randn(10_000_000)
55-
s = pd.Series(y)
58+
x = np.arange(len(y))
59+
60+
# Downsample to 1000 points (assuming constant sampling rate)
61+
s_ds = MinMaxLTTBDownsampler().downsample(y, n_out=1000)
5662

57-
# Downsample to 1000 points
58-
s_ds = MinMaxLTTBDownsampler.downsample(s, n_out=1000)
63+
# Downsample to 1000 points using the (possible irregularly spaced) x-data
64+
s_ds = MinMaxLTTBDownsampler().downsample(x, y, n_out=1000)
5965
```
6066

67+
## Limitations
68+
69+
Assumes;
70+
(i) x-data monotinically increasing (i.e., sorted)
71+
(ii) no NaNs in the data
72+
6173
---
6274

6375
<p align="center">

downsample_rs/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ license = "MIT"
88

99
[dependencies]
1010
ndarray = {version = "0.15.6", default-features = false, features = ["rayon"] }
11-
argminmax = { version = "0.2.1" , features = ["half"] }
11+
argminmax = { version = "0.3" , features = ["half"] }
1212
half = { version = "2.1", default-features = false , optional = true}
1313

1414
[dev-dependencies]

downsample_rs/src/lttb/scalar.rs

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,24 @@ pub fn lttb<Tx: Num, Ty: Num>(x: ArrayView1<Tx>, y: ArrayView1<Ty>, n_out: usize
2323

2424
for i in 0..n_out - 2 {
2525
// Calculate point average for next bucket (containing c).
26-
let mut avg_x: Tx = Tx::default();
27-
let mut avg_y: Ty = Ty::default();
26+
// let mut avg_x: Tx = Tx::default();
27+
// let mut avg_y: Ty = Ty::default();
28+
// TODO: check the impact of using f64 (is necessary to avoid overflow)
29+
let mut avg_x: f64 = 0.0;
30+
let mut avg_y: f64 = 0.0;
2831

2932
let avg_range_start = (every * (i + 1) as f64) as usize + 1;
3033
let avg_range_end = cmp::min((every * (i + 2) as f64) as usize + 1, x.len());
3134

3235
for i in avg_range_start..avg_range_end {
33-
avg_x = avg_x + x[i];
34-
avg_y = avg_y + y[i];
36+
avg_x = avg_x + x[i].to_f64();
37+
avg_y = avg_y + y[i].to_f64();
3538
}
3639
// Slicing seems to be a lot slower
3740
// let avg_x: Tx = x.slice(s![avg_range_start..avg_range_end]).sum();
3841
// let avg_y: Ty = y.slice(s![avg_range_start..avg_range_end]).sum();
39-
let avg_x: f64 = avg_x.to_f64() / (avg_range_end - avg_range_start) as f64;
40-
let avg_y: f64 = avg_y.to_f64() / (avg_range_end - avg_range_start) as f64;
42+
let avg_x: f64 = avg_x / (avg_range_end - avg_range_start) as f64;
43+
let avg_y: f64 = avg_y / (avg_range_end - avg_range_start) as f64;
4144

4245
// Get the range for this bucket
4346
let range_offs = (every * i as f64) as usize + 1;
@@ -93,17 +96,19 @@ pub fn lttb_without_x<Ty: Num>(y: ArrayView1<Ty>, n_out: usize) -> Array1<usize>
9396

9497
for i in 0..n_out - 2 {
9598
// Calculate point average for next bucket (containing c).
96-
let mut avg_y: Ty = Ty::default();
99+
// let mut avg_y: Ty = Ty::default();
100+
// TODO: check impact of using f64 (is necessary to avoid overflow)
101+
let mut avg_y: f64 = 0.0;
97102

98103
let avg_range_start = (every * (i + 1) as f64) as usize + 1;
99104
let avg_range_end = cmp::min((every * (i + 2) as f64) as usize + 1, y.len());
100105

101106
for i in avg_range_start..avg_range_end {
102-
avg_y = avg_y + y[i];
107+
avg_y = avg_y + y[i].to_f64();
103108
}
104109
// Slicing seems to be a lot slower
105110
// let avg_x: Tx = x.slice(s![avg_range_start..avg_range_end]).sum();
106-
let avg_y: f64 = avg_y.to_f64() / (avg_range_end - avg_range_start) as f64;
111+
let avg_y: f64 = avg_y / (avg_range_end - avg_range_start) as f64;
107112
let avg_x: f64 = (avg_range_start + avg_range_end - 1) as f64 / 2.0;
108113

109114
// Get the range for this bucket

0 commit comments

Comments
 (0)