diff --git a/examples/verbose_example.ipynb b/examples/verbose_example.ipynb
index 643739f7..4985eba6 100644
--- a/examples/verbose_example.ipynb
+++ b/examples/verbose_example.ipynb
@@ -13,7 +13,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -69,7 +68,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -79,7 +77,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -103,7 +100,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -111,7 +107,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -121,7 +116,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -225,7 +219,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -233,7 +226,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -246,7 +238,16 @@
"cell_type": "code",
"execution_count": 7,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/snil/Documents/tsflex/tsflex/utils/logging.py:81: RuntimeWarning: Logging file (example_processing_logs.log) already exists. This file will be overwritten!\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ],
"source": [
"_ = series_pipe.process([df_gsr, df_acc, df_tmp], logging_file_path=\"example_processing_logs.log\")"
]
@@ -288,48 +289,48 @@
"
\n",
" \n",
" | 0 | \n",
- " 2023-05-05 12:16:53.596 | \n",
+ " 2023-08-16 12:45:29.484 | \n",
" clip_quantiles | \n",
" (TMP,) | \n",
" TMP | \n",
- " 0 days 00:00:00.002578633 | \n",
- " 4.73 | \n",
+ " 0 days 00:00:00.002462031 | \n",
+ " 7.67 | \n",
"
\n",
" \n",
" | 1 | \n",
- " 2023-05-05 12:16:53.597 | \n",
+ " 2023-08-16 12:45:29.486 | \n",
" savgol_filter | \n",
" (EDA,) | \n",
" EDA | \n",
- " 0 days 00:00:00.000956786 | \n",
- " 1.76 | \n",
+ " 0 days 00:00:00.001066185 | \n",
+ " 3.32 | \n",
"
\n",
" \n",
" | 2 | \n",
- " 2023-05-05 12:16:53.628 | \n",
+ " 2023-08-16 12:45:29.505 | \n",
" savgol_filter | \n",
" (ACC_x,), (ACC_y,), (ACC_z,) | \n",
" ACC_x, ACC_y, ACC_z | \n",
- " 0 days 00:00:00.030402122 | \n",
- " 55.82 | \n",
+ " 0 days 00:00:00.019186704 | \n",
+ " 59.79 | \n",
"
\n",
" \n",
" | 3 | \n",
- " 2023-05-05 12:16:53.635 | \n",
+ " 2023-08-16 12:45:29.510 | \n",
" smv | \n",
" (ACC_x, ACC_y, ACC_z) | \n",
" ACC_SMV | \n",
- " 0 days 00:00:00.006881422 | \n",
- " 12.63 | \n",
+ " 0 days 00:00:00.004629862 | \n",
+ " 14.43 | \n",
"
\n",
" \n",
" | 4 | \n",
- " 2023-05-05 12:16:53.649 | \n",
+ " 2023-08-16 12:45:29.514 | \n",
" clip_quantiles | \n",
" (ACC_SMV,) | \n",
" ACC_SMV | \n",
- " 0 days 00:00:00.013646172 | \n",
- " 25.05 | \n",
+ " 0 days 00:00:00.004746801 | \n",
+ " 14.79 | \n",
"
\n",
" \n",
"\n",
@@ -337,18 +338,18 @@
],
"text/plain": [
" log_time function series_names \\\n",
- "0 2023-05-05 12:16:53.596 clip_quantiles (TMP,) \n",
- "1 2023-05-05 12:16:53.597 savgol_filter (EDA,) \n",
- "2 2023-05-05 12:16:53.628 savgol_filter (ACC_x,), (ACC_y,), (ACC_z,) \n",
- "3 2023-05-05 12:16:53.635 smv (ACC_x, ACC_y, ACC_z) \n",
- "4 2023-05-05 12:16:53.649 clip_quantiles (ACC_SMV,) \n",
+ "0 2023-08-16 12:45:29.484 clip_quantiles (TMP,) \n",
+ "1 2023-08-16 12:45:29.486 savgol_filter (EDA,) \n",
+ "2 2023-08-16 12:45:29.505 savgol_filter (ACC_x,), (ACC_y,), (ACC_z,) \n",
+ "3 2023-08-16 12:45:29.510 smv (ACC_x, ACC_y, ACC_z) \n",
+ "4 2023-08-16 12:45:29.514 clip_quantiles (ACC_SMV,) \n",
"\n",
" output_names duration duration % \n",
- "0 TMP 0 days 00:00:00.002578633 4.73 \n",
- "1 EDA 0 days 00:00:00.000956786 1.76 \n",
- "2 ACC_x, ACC_y, ACC_z 0 days 00:00:00.030402122 55.82 \n",
- "3 ACC_SMV 0 days 00:00:00.006881422 12.63 \n",
- "4 ACC_SMV 0 days 00:00:00.013646172 25.05 "
+ "0 TMP 0 days 00:00:00.002462031 7.67 \n",
+ "1 EDA 0 days 00:00:00.001066185 3.32 \n",
+ "2 ACC_x, ACC_y, ACC_z 0 days 00:00:00.019186704 59.79 \n",
+ "3 ACC_SMV 0 days 00:00:00.004629862 14.43 \n",
+ "4 ACC_SMV 0 days 00:00:00.004746801 14.79 "
]
},
"execution_count": 8,
@@ -361,7 +362,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -369,7 +369,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {
"tags": []
@@ -382,16 +381,7 @@
"cell_type": "code",
"execution_count": 9,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/home/jeroen/.cache/pypoetry/virtualenvs/tsflex-5Y4iXlk8-py3.10/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
- " from .autonotebook import tqdm as notebook_tqdm\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"try: \n",
" from tsflex.features import FeatureCollection, FuncWrapper\n",
@@ -406,7 +396,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {
"tags": []
@@ -416,7 +405,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -515,7 +503,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {
"tags": []
@@ -525,7 +512,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -551,7 +537,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {
"tags": []
@@ -561,7 +546,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -574,7 +558,6 @@
"cell_type": "code",
"execution_count": 11,
"metadata": {
- "collapsed": false,
"pycharm": {
"name": "#%%\n"
}
@@ -610,22 +593,22 @@
" \n",
" \n",
" \n",
- " | 2017-06-13 15:41:21+02:00 | \n",
- " 34.369999 | \n",
+ " 2017-06-13 14:28:02.500000+02:00 | \n",
+ " 30.35 | \n",
"
\n",
" \n",
- " | 2017-06-13 15:00:18.750000+02:00 | \n",
- " 35.500000 | \n",
+ " 2017-06-13 15:49:32.500000+02:00 | \n",
+ " 33.68 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " TMP\n",
- "timestamp \n",
- "2017-06-13 15:41:21+02:00 34.369999\n",
- "2017-06-13 15:00:18.750000+02:00 35.500000"
+ " TMP\n",
+ "timestamp \n",
+ "2017-06-13 14:28:02.500000+02:00 30.35\n",
+ "2017-06-13 15:49:32.500000+02:00 33.68"
]
},
"execution_count": 11,
@@ -638,7 +621,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -753,36 +735,36 @@
" \n",
" \n",
" \n",
- " | 2017-06-13 15:19:13+02:00 | \n",
- " 35.029999 | \n",
- " 34.930000 | \n",
- " 8394.200195 | \n",
- " -0.810092 | \n",
- " 34.975834 | \n",
- " 34.950001 | \n",
- " 34.990002 | \n",
- " 34.990002 | \n",
- " 34.975845 | \n",
- " -0.058798 | \n",
- " -0.000345 | \n",
- " 0.028884 | \n",
- " 0.000834 | \n",
- "
\n",
- " \n",
- " | 2017-06-13 16:17:43+02:00 | \n",
- " 30.250000 | \n",
- " 29.809999 | \n",
- " 7206.559570 | \n",
- " -1.437857 | \n",
- " 30.027332 | \n",
- " 29.889999 | \n",
- " 30.030001 | \n",
- " 30.170000 | \n",
- " 30.027679 | \n",
- " 0.071025 | \n",
- " 0.001219 | \n",
- " 0.144382 | \n",
- " 0.020846 | \n",
+ " 2017-06-13 14:38:43+02:00 | \n",
+ " 32.790001 | \n",
+ " 32.290001 | \n",
+ " 7803.440430 | \n",
+ " -1.432049 | \n",
+ " 32.514336 | \n",
+ " 32.330002 | \n",
+ " 32.510000 | \n",
+ " 32.660000 | \n",
+ " 32.514774 | \n",
+ " 0.209048 | \n",
+ " 0.002374 | \n",
+ " 0.16899 | \n",
+ " 0.028558 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 15:17:13+02:00 | \n",
+ " 34.610001 | \n",
+ " 34.430000 | \n",
+ " 8278.279297 | \n",
+ " -0.480542 | \n",
+ " 34.492832 | \n",
+ " 34.450001 | \n",
+ " 34.470001 | \n",
+ " 34.529999 | \n",
+ " 34.492874 | \n",
+ " 0.847968 | \n",
+ " 0.000680 | \n",
+ " 0.05345 | \n",
+ " 0.002857 | \n",
"
\n",
" \n",
"\n",
@@ -791,33 +773,33 @@
"text/plain": [
" TMP__amax__w=1m TMP__amin__w=1m TMP__area__w=1m \\\n",
"timestamp \n",
- "2017-06-13 15:19:13+02:00 35.029999 34.930000 8394.200195 \n",
- "2017-06-13 16:17:43+02:00 30.250000 29.809999 7206.559570 \n",
+ "2017-06-13 14:38:43+02:00 32.790001 32.290001 7803.440430 \n",
+ "2017-06-13 15:17:13+02:00 34.610001 34.430000 8278.279297 \n",
"\n",
" TMP__kurtosis__w=1m TMP__mean__w=1m \\\n",
"timestamp \n",
- "2017-06-13 15:19:13+02:00 -0.810092 34.975834 \n",
- "2017-06-13 16:17:43+02:00 -1.437857 30.027332 \n",
+ "2017-06-13 14:38:43+02:00 -1.432049 32.514336 \n",
+ "2017-06-13 15:17:13+02:00 -0.480542 34.492832 \n",
"\n",
" TMP__quantile_0.25__w=1m TMP__quantile_0.5__w=1m \\\n",
"timestamp \n",
- "2017-06-13 15:19:13+02:00 34.950001 34.990002 \n",
- "2017-06-13 16:17:43+02:00 29.889999 30.030001 \n",
+ "2017-06-13 14:38:43+02:00 32.330002 32.510000 \n",
+ "2017-06-13 15:17:13+02:00 34.450001 34.470001 \n",
"\n",
" TMP__quantile_0.75__w=1m TMP__rms__w=1m \\\n",
"timestamp \n",
- "2017-06-13 15:19:13+02:00 34.990002 34.975845 \n",
- "2017-06-13 16:17:43+02:00 30.170000 30.027679 \n",
+ "2017-06-13 14:38:43+02:00 32.660000 32.514774 \n",
+ "2017-06-13 15:17:13+02:00 34.529999 34.492874 \n",
"\n",
" TMP__skew__w=1m TMP__slope__w=1m TMP__std__w=1m \\\n",
"timestamp \n",
- "2017-06-13 15:19:13+02:00 -0.058798 -0.000345 0.028884 \n",
- "2017-06-13 16:17:43+02:00 0.071025 0.001219 0.144382 \n",
+ "2017-06-13 14:38:43+02:00 0.209048 0.002374 0.16899 \n",
+ "2017-06-13 15:17:13+02:00 0.847968 0.000680 0.05345 \n",
"\n",
" TMP__var__w=1m \n",
"timestamp \n",
- "2017-06-13 15:19:13+02:00 0.000834 \n",
- "2017-06-13 16:17:43+02:00 0.020846 "
+ "2017-06-13 14:38:43+02:00 0.028558 \n",
+ "2017-06-13 15:17:13+02:00 0.002857 "
]
},
"execution_count": 13,
@@ -838,7 +820,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -846,7 +827,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -854,7 +834,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -873,22 +852,22 @@
"EDA: (\n",
"\twin: 1h : [\n",
"\t\tFeatureDescriptor - func: FuncWrapper(mean, ['mean'], {}) stride: ['30s'],\n",
- "\t\tFeatureDescriptor - func: FuncWrapper(skew, ['skew'], {}) stride: ['15s'],\n",
- "\t\tFeatureDescriptor - func: FuncWrapper(kurtosis, ['kurtosis'], {}) stride: ['30s'],\n",
- "\t\tFeatureDescriptor - func: FuncWrapper(sum, ['area'], {}) stride: ['30s'],\n",
- "\t]\n",
- "\twin: 30s : [\n",
- "\t\tFeatureDescriptor - func: FuncWrapper(std, ['std'], {}) stride: ['15s'],\n",
- "\t\tFeatureDescriptor - func: FuncWrapper(var, ['var'], {}) stride: ['15s'],\n",
- "\t\tFeatureDescriptor - func: FuncWrapper(quantile, ['quantile_0.25', 'quantile_0.5', 'quantile_0.75'], {'q': [0.25, 0.5, 0.75]}) stride: ['30s'],\n",
"\t]\n",
"\twin: 1m30s : [\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(std, ['std'], {}) stride: ['30s'],\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(var, ['var'], {}) stride: ['30s'],\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(kurtosis, ['kurtosis'], {}) stride: ['15s'],\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(slope, ['slope'], {}) stride: ['15s'],\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(sum, ['area'], {}) stride: ['15s'],\n",
+ "\t]\n",
+ "\twin: 2m : [\n",
"\t\tFeatureDescriptor - func: FuncWrapper(amax, ['amax'], {}) stride: ['15s'],\n",
"\t\tFeatureDescriptor - func: FuncWrapper(amin, ['amin'], {}) stride: ['15s'],\n",
- "\t\tFeatureDescriptor - func: FuncWrapper(, ['rms'], {}) stride: ['30s'],\n",
"\t]\n",
- "\twin: 2m : [\n",
- "\t\tFeatureDescriptor - func: FuncWrapper(slope, ['slope'], {}) stride: ['15s'],\n",
+ "\twin: 30s : [\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(skew, ['skew'], {}) stride: ['15s'],\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(quantile, ['quantile_0.25', 'quantile_0.5', 'quantile_0.75'], {'q': [0.25, 0.5, 0.75]}) stride: ['30s'],\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(, ['rms'], {}) stride: ['15s'],\n",
"\t]\n",
")\n",
"\n",
@@ -916,19 +895,19 @@
" \n",
" \n",
" | \n",
- " EDA__amax__w=1m30s | \n",
- " EDA__amin__w=1m30s | \n",
- " EDA__area__w=1h | \n",
- " EDA__kurtosis__w=1h | \n",
+ " EDA__amax__w=2m | \n",
+ " EDA__amin__w=2m | \n",
+ " EDA__area__w=1m30s | \n",
+ " EDA__kurtosis__w=1m30s | \n",
" EDA__mean__w=1h | \n",
" EDA__quantile_0.25__w=30s | \n",
" EDA__quantile_0.5__w=30s | \n",
" EDA__quantile_0.75__w=30s | \n",
- " EDA__rms__w=1m30s | \n",
- " EDA__skew__w=1h | \n",
- " EDA__slope__w=2m | \n",
- " EDA__std__w=30s | \n",
- " EDA__var__w=30s | \n",
+ " EDA__rms__w=30s | \n",
+ " EDA__skew__w=30s | \n",
+ " EDA__slope__w=1m30s | \n",
+ " EDA__std__w=1m30s | \n",
+ " EDA__var__w=1m30s | \n",
"
\n",
" \n",
" | timestamp | \n",
@@ -949,76 +928,76 @@
"
\n",
" \n",
" \n",
- " | 2017-06-13 16:09:43+02:00 | \n",
- " 1.207757 | \n",
- " 1.139973 | \n",
- " 30162.832031 | \n",
- " -0.272705 | \n",
- " 2.094641 | \n",
- " 1.157878 | \n",
- " 1.160436 | \n",
- " 1.166831 | \n",
- " 1.168666 | \n",
- " 0.94309 | \n",
- " -0.000009 | \n",
- " 0.005837 | \n",
- " 0.000034 | \n",
- "
\n",
- " \n",
- " | 2017-06-13 15:02:58+02:00 | \n",
- " 0.780491 | \n",
- " 0.703754 | \n",
- " NaN | \n",
- " NaN | \n",
+ " 2017-06-13 16:12:13+02:00 | \n",
+ " 1.220547 | \n",
+ " 1.012079 | \n",
+ " 398.828674 | \n",
+ " 7.343093 | \n",
+ " 2.056634 | \n",
+ " 1.082421 | \n",
+ " 1.095849 | \n",
+ " 1.106721 | \n",
+ " 1.095335 | \n",
+ " -0.420529 | \n",
+ " -0.000089 | \n",
+ " 0.016636 | \n",
+ " 0.000277 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:52:58+02:00 | \n",
+ " 0.950591 | \n",
+ " 0.777933 | \n",
+ " 306.619629 | \n",
+ " -1.330574 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
+ " 0.814525 | \n",
+ " 0.054707 | \n",
+ " -0.000287 | \n",
" NaN | \n",
" NaN | \n",
- " -0.000113 | \n",
- " 0.005096 | \n",
- " 0.000026 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " EDA__amax__w=1m30s EDA__amin__w=1m30s \\\n",
- "timestamp \n",
- "2017-06-13 16:09:43+02:00 1.207757 1.139973 \n",
- "2017-06-13 15:02:58+02:00 0.780491 0.703754 \n",
+ " EDA__amax__w=2m EDA__amin__w=2m \\\n",
+ "timestamp \n",
+ "2017-06-13 16:12:13+02:00 1.220547 1.012079 \n",
+ "2017-06-13 14:52:58+02:00 0.950591 0.777933 \n",
"\n",
- " EDA__area__w=1h EDA__kurtosis__w=1h \\\n",
- "timestamp \n",
- "2017-06-13 16:09:43+02:00 30162.832031 -0.272705 \n",
- "2017-06-13 15:02:58+02:00 NaN NaN \n",
+ " EDA__area__w=1m30s EDA__kurtosis__w=1m30s \\\n",
+ "timestamp \n",
+ "2017-06-13 16:12:13+02:00 398.828674 7.343093 \n",
+ "2017-06-13 14:52:58+02:00 306.619629 -1.330574 \n",
"\n",
" EDA__mean__w=1h EDA__quantile_0.25__w=30s \\\n",
"timestamp \n",
- "2017-06-13 16:09:43+02:00 2.094641 1.157878 \n",
- "2017-06-13 15:02:58+02:00 NaN NaN \n",
+ "2017-06-13 16:12:13+02:00 2.056634 1.082421 \n",
+ "2017-06-13 14:52:58+02:00 NaN NaN \n",
"\n",
" EDA__quantile_0.5__w=30s \\\n",
"timestamp \n",
- "2017-06-13 16:09:43+02:00 1.160436 \n",
- "2017-06-13 15:02:58+02:00 NaN \n",
+ "2017-06-13 16:12:13+02:00 1.095849 \n",
+ "2017-06-13 14:52:58+02:00 NaN \n",
"\n",
- " EDA__quantile_0.75__w=30s EDA__rms__w=1m30s \\\n",
- "timestamp \n",
- "2017-06-13 16:09:43+02:00 1.166831 1.168666 \n",
- "2017-06-13 15:02:58+02:00 NaN NaN \n",
+ " EDA__quantile_0.75__w=30s EDA__rms__w=30s \\\n",
+ "timestamp \n",
+ "2017-06-13 16:12:13+02:00 1.106721 1.095335 \n",
+ "2017-06-13 14:52:58+02:00 NaN 0.814525 \n",
"\n",
- " EDA__skew__w=1h EDA__slope__w=2m EDA__std__w=30s \\\n",
- "timestamp \n",
- "2017-06-13 16:09:43+02:00 0.94309 -0.000009 0.005837 \n",
- "2017-06-13 15:02:58+02:00 NaN -0.000113 0.005096 \n",
+ " EDA__skew__w=30s EDA__slope__w=1m30s \\\n",
+ "timestamp \n",
+ "2017-06-13 16:12:13+02:00 -0.420529 -0.000089 \n",
+ "2017-06-13 14:52:58+02:00 0.054707 -0.000287 \n",
"\n",
- " EDA__var__w=30s \n",
- "timestamp \n",
- "2017-06-13 16:09:43+02:00 0.000034 \n",
- "2017-06-13 15:02:58+02:00 0.000026 "
+ " EDA__std__w=1m30s EDA__var__w=1m30s \n",
+ "timestamp \n",
+ "2017-06-13 16:12:13+02:00 0.016636 0.000277 \n",
+ "2017-06-13 14:52:58+02:00 NaN NaN "
]
},
"execution_count": 14,
@@ -1052,7 +1031,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1094,7 +1072,7 @@
" \n",
" \n",
" | \n",
- " EDA__area__w=1h | \n",
+ " EDA__amax__w=2m | \n",
"
\n",
" \n",
" | timestamp | \n",
@@ -1103,27 +1081,27 @@
"
\n",
" \n",
" \n",
- " | 2017-06-13 15:26:43+02:00 | \n",
- " 15623.870117 | \n",
+ " 2017-06-13 15:55:28+02:00 | \n",
+ " 4.517331 | \n",
"
\n",
" \n",
- " | 2017-06-13 16:18:13+02:00 | \n",
- " 28911.509766 | \n",
+ " 2017-06-13 16:05:58+02:00 | \n",
+ " 1.471220 | \n",
"
\n",
" \n",
- " | 2017-06-13 16:19:43+02:00 | \n",
- " 28819.720703 | \n",
+ " 2017-06-13 15:51:43+02:00 | \n",
+ " 4.738588 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " EDA__area__w=1h\n",
+ " EDA__amax__w=2m\n",
"timestamp \n",
- "2017-06-13 15:26:43+02:00 15623.870117\n",
- "2017-06-13 16:18:13+02:00 28911.509766\n",
- "2017-06-13 16:19:43+02:00 28819.720703"
+ "2017-06-13 15:55:28+02:00 4.517331\n",
+ "2017-06-13 16:05:58+02:00 1.471220\n",
+ "2017-06-13 15:51:43+02:00 4.738588"
]
},
"execution_count": 15,
@@ -1138,7 +1116,633 @@
]
},
{
- "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Rounded start index feature extraction"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "When working with data that contain 'unclean' timestamps, it is possible to prettify the results by using the `exact_time` parameter.\n",
+ "\n",
+ "Here you can see some data that have a very small resolution."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ACC_x | \n",
+ " ACC_y | \n",
+ " ACC_z | \n",
+ "
\n",
+ " \n",
+ " | timestamp | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2017-06-13 14:22:13.937500+02:00 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 63 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:22:13.968750+02:00 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 63 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:22:14+02:00 | \n",
+ " -1 | \n",
+ " 4 | \n",
+ " 63 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:22:14.031250+02:00 | \n",
+ " -2 | \n",
+ " 4 | \n",
+ " 63 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:22:14.062500+02:00 | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 63 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ACC_x ACC_y ACC_z\n",
+ "timestamp \n",
+ "2017-06-13 14:22:13.937500+02:00 1 6 63\n",
+ "2017-06-13 14:22:13.968750+02:00 1 6 63\n",
+ "2017-06-13 14:22:14+02:00 -1 4 63\n",
+ "2017-06-13 14:22:14.031250+02:00 -2 4 63\n",
+ "2017-06-13 14:22:14.062500+02:00 0 5 63"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_acc.iloc[30:].head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "ACC_x: (\n",
+ "\twin: 30s : [\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(std, ['std'], {}) stride: ['10s'],\n",
+ "\t]\n",
+ ")"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "fc = FeatureCollection(\n",
+ " [\n",
+ "FeatureDescriptor(\n",
+ " series_name=\"ACC_x\",\n",
+ " window='30s',\n",
+ " stride='10s',\n",
+ " function=np.std,\n",
+ " )\n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "fc"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ACC_x__std__w=30s | \n",
+ "
\n",
+ " \n",
+ " | timestamp | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2017-06-13 14:22:43.937500+02:00 | \n",
+ " 0.771801 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:22:53.937500+02:00 | \n",
+ " 0.799006 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:23:03.937500+02:00 | \n",
+ " 0.659779 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:23:13.937500+02:00 | \n",
+ " 0.624957 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:23:23.937500+02:00 | \n",
+ " 0.700799 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:13.937500+02:00 | \n",
+ " 18.077957 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:23.937500+02:00 | \n",
+ " 8.099124 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:33.937500+02:00 | \n",
+ " 14.798578 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:43.937500+02:00 | \n",
+ " 14.624782 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:53.937500+02:00 | \n",
+ " 13.951030 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
752 rows × 1 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ACC_x__std__w=30s\n",
+ "timestamp \n",
+ "2017-06-13 14:22:43.937500+02:00 0.771801\n",
+ "2017-06-13 14:22:53.937500+02:00 0.799006\n",
+ "2017-06-13 14:23:03.937500+02:00 0.659779\n",
+ "2017-06-13 14:23:13.937500+02:00 0.624957\n",
+ "2017-06-13 14:23:23.937500+02:00 0.700799\n",
+ "... ...\n",
+ "2017-06-13 16:27:13.937500+02:00 18.077957\n",
+ "2017-06-13 16:27:23.937500+02:00 8.099124\n",
+ "2017-06-13 16:27:33.937500+02:00 14.798578\n",
+ "2017-06-13 16:27:43.937500+02:00 14.624782\n",
+ "2017-06-13 16:27:53.937500+02:00 13.951030\n",
+ "\n",
+ "[752 rows x 1 columns]"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "fc.calculate(df_acc.iloc[30:])[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can, for example, have a resolution of 1 second as seen below.\n",
+ "\n",
+ "**note:**\n",
+ "This can result in (slightly) altered results than when not rounding the timestamps. This will also depend on the resolution used."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ACC_x__std__w=30s | \n",
+ "
\n",
+ " \n",
+ " | timestamp | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2017-06-13 14:22:44+02:00 | \n",
+ " 0.770055 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:22:54+02:00 | \n",
+ " 0.798542 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:23:04+02:00 | \n",
+ " 0.660551 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:23:14+02:00 | \n",
+ " 0.624135 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:23:24+02:00 | \n",
+ " 0.700799 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:14+02:00 | \n",
+ " 18.065915 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:24+02:00 | \n",
+ " 8.160273 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:34+02:00 | \n",
+ " 14.800646 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:44+02:00 | \n",
+ " 14.624782 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:54+02:00 | \n",
+ " 13.952225 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
752 rows × 1 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ACC_x__std__w=30s\n",
+ "timestamp \n",
+ "2017-06-13 14:22:44+02:00 0.770055\n",
+ "2017-06-13 14:22:54+02:00 0.798542\n",
+ "2017-06-13 14:23:04+02:00 0.660551\n",
+ "2017-06-13 14:23:14+02:00 0.624135\n",
+ "2017-06-13 14:23:24+02:00 0.700799\n",
+ "... ...\n",
+ "2017-06-13 16:27:14+02:00 18.065915\n",
+ "2017-06-13 16:27:24+02:00 8.160273\n",
+ "2017-06-13 16:27:34+02:00 14.800646\n",
+ "2017-06-13 16:27:44+02:00 14.624782\n",
+ "2017-06-13 16:27:54+02:00 13.952225\n",
+ "\n",
+ "[752 rows x 1 columns]"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "fc.calculate(df_acc.iloc[30:], exact_time='1s')[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For time indices, you can also use a timedelta object."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ACC_x__std__w=30s | \n",
+ "
\n",
+ " \n",
+ " | timestamp | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2017-06-13 14:22:50+02:00 | \n",
+ " 0.783929 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:23:00+02:00 | \n",
+ " 0.798185 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:23:10+02:00 | \n",
+ " 0.665362 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:23:20+02:00 | \n",
+ " 0.631974 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:23:30+02:00 | \n",
+ " 0.683508 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:20+02:00 | \n",
+ " 11.929327 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:30+02:00 | \n",
+ " 14.153599 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:40+02:00 | \n",
+ " 14.686610 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:50+02:00 | \n",
+ " 14.468111 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:28:00+02:00 | \n",
+ " 10.192261 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
752 rows × 1 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ACC_x__std__w=30s\n",
+ "timestamp \n",
+ "2017-06-13 14:22:50+02:00 0.783929\n",
+ "2017-06-13 14:23:00+02:00 0.798185\n",
+ "2017-06-13 14:23:10+02:00 0.665362\n",
+ "2017-06-13 14:23:20+02:00 0.631974\n",
+ "2017-06-13 14:23:30+02:00 0.683508\n",
+ "... ...\n",
+ "2017-06-13 16:27:20+02:00 11.929327\n",
+ "2017-06-13 16:27:30+02:00 14.153599\n",
+ "2017-06-13 16:27:40+02:00 14.686610\n",
+ "2017-06-13 16:27:50+02:00 14.468111\n",
+ "2017-06-13 16:28:00+02:00 10.192261\n",
+ "\n",
+ "[752 rows x 1 columns]"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "fc.calculate(df_acc.iloc[30:], exact_time=pd.Timedelta(10, 'seconds'))[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Or when setting it to `False`, it rounds using the LCM of strides and window. (In this case, the first index will be rounded to a multiple of 30 seconds)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ACC_x__std__w=30s | \n",
+ "
\n",
+ " \n",
+ " | timestamp | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2017-06-13 14:23:00+02:00 | \n",
+ " 0.798185 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:23:10+02:00 | \n",
+ " 0.665362 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:23:20+02:00 | \n",
+ " 0.631974 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:23:30+02:00 | \n",
+ " 0.683508 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 14:23:40+02:00 | \n",
+ " 0.629029 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:20+02:00 | \n",
+ " 11.929327 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:30+02:00 | \n",
+ " 14.153599 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:40+02:00 | \n",
+ " 14.686610 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:27:50+02:00 | \n",
+ " 14.468111 | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 16:28:00+02:00 | \n",
+ " 10.192261 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
751 rows × 1 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ACC_x__std__w=30s\n",
+ "timestamp \n",
+ "2017-06-13 14:23:00+02:00 0.798185\n",
+ "2017-06-13 14:23:10+02:00 0.665362\n",
+ "2017-06-13 14:23:20+02:00 0.631974\n",
+ "2017-06-13 14:23:30+02:00 0.683508\n",
+ "2017-06-13 14:23:40+02:00 0.629029\n",
+ "... ...\n",
+ "2017-06-13 16:27:20+02:00 11.929327\n",
+ "2017-06-13 16:27:30+02:00 14.153599\n",
+ "2017-06-13 16:27:40+02:00 14.686610\n",
+ "2017-06-13 16:27:50+02:00 14.468111\n",
+ "2017-06-13 16:28:00+02:00 10.192261\n",
+ "\n",
+ "[751 rows x 1 columns]"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "fc.calculate(df_acc.iloc[30:], exact_time=False)[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This functionality can also be used in sequence data, where the indices are numeric. In that case, the values of window, stride should be numeric and the value of exact_time can either be a `bool`, `int` or `float`.\n",
+ "\n",
+ "The full description of this parameter can be found in this table:\n",
+ "\n",
+ "| index datatype | rounding datatype | return datatype | extra info |\n",
+ "| :------------- | :---------------- | --------------: | ----------------------------------------------------------------: |\n",
+ "| int | int | int | round `index` to nearest multiple of `rounding` |\n",
+ "| | float | float | round `index` to nearest multiple of `rounding` |\n",
+ "| | bool | float | round `index` to lcm of `window` and/or `stride` |\n",
+ "| float | int | float | round `index` to nearest multiple of `rounding` |\n",
+ "| | float | float | round `index` to nearest multiple of `rounding` |\n",
+ "| | bool | float | round `index` to lcm of `window` and/or `stride` |\n",
+ "| pd.timestamp | str | pd.timestamp | round `index` to resolution of `rounding` (e.g. '10s', '2m', 'h') |\n",
+ "| | bool | pd.timestamp | round `index` to lcm of `window` and/or `stride` |\n",
+ "| | pd.timedelta | pd.timestamp | round `index` to nearest multiple of `rounding`. |\n"
+ ]
+ },
+ {
"cell_type": "markdown",
"metadata": {
"tags": []
@@ -1161,22 +1765,22 @@
"EDA: (\n",
"\twin: 1h : [\n",
"\t\tFeatureDescriptor - func: FuncWrapper(mean, ['mean'], {}) stride: ['30s'],\n",
- "\t\tFeatureDescriptor - func: FuncWrapper(skew, ['skew'], {}) stride: ['15s'],\n",
- "\t\tFeatureDescriptor - func: FuncWrapper(kurtosis, ['kurtosis'], {}) stride: ['30s'],\n",
- "\t\tFeatureDescriptor - func: FuncWrapper(sum, ['area'], {}) stride: ['30s'],\n",
- "\t]\n",
- "\twin: 30s : [\n",
- "\t\tFeatureDescriptor - func: FuncWrapper(std, ['std'], {}) stride: ['15s'],\n",
- "\t\tFeatureDescriptor - func: FuncWrapper(var, ['var'], {}) stride: ['15s'],\n",
- "\t\tFeatureDescriptor - func: FuncWrapper(quantile, ['quantile_0.25', 'quantile_0.5', 'quantile_0.75'], {'q': [0.25, 0.5, 0.75]}) stride: ['30s'],\n",
"\t]\n",
"\twin: 1m30s : [\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(std, ['std'], {}) stride: ['30s'],\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(var, ['var'], {}) stride: ['30s'],\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(kurtosis, ['kurtosis'], {}) stride: ['15s'],\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(slope, ['slope'], {}) stride: ['15s'],\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(sum, ['area'], {}) stride: ['15s'],\n",
+ "\t]\n",
+ "\twin: 2m : [\n",
"\t\tFeatureDescriptor - func: FuncWrapper(amax, ['amax'], {}) stride: ['15s'],\n",
"\t\tFeatureDescriptor - func: FuncWrapper(amin, ['amin'], {}) stride: ['15s'],\n",
- "\t\tFeatureDescriptor - func: FuncWrapper(, ['rms'], {}) stride: ['30s'],\n",
"\t]\n",
- "\twin: 2m : [\n",
- "\t\tFeatureDescriptor - func: FuncWrapper(slope, ['slope'], {}) stride: ['15s'],\n",
+ "\twin: 30s : [\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(skew, ['skew'], {}) stride: ['15s'],\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(quantile, ['quantile_0.25', 'quantile_0.5', 'quantile_0.75'], {'q': [0.25, 0.5, 0.75]}) stride: ['30s'],\n",
+ "\t\tFeatureDescriptor - func: FuncWrapper(, ['rms'], {}) stride: ['15s'],\n",
"\t]\n",
")\n",
"TMP: (\n",
@@ -1219,16 +1823,16 @@
" \n",
" \n",
" | \n",
- " EDA__amax__w=1m30s | \n",
- " EDA__amin__w=1m30s | \n",
- " EDA__area__w=1h | \n",
- " EDA__kurtosis__w=1h | \n",
+ " EDA__amax__w=2m | \n",
+ " EDA__amin__w=2m | \n",
+ " EDA__area__w=1m30s | \n",
+ " EDA__kurtosis__w=1m30s | \n",
" EDA__mean__w=1h | \n",
" EDA__quantile_0.25__w=30s | \n",
" EDA__quantile_0.5__w=30s | \n",
" EDA__quantile_0.75__w=30s | \n",
- " EDA__rms__w=1m30s | \n",
- " EDA__skew__w=1h | \n",
+ " EDA__rms__w=30s | \n",
+ " EDA__skew__w=30s | \n",
" ... | \n",
" TMP__kurtosis__w=1m | \n",
" TMP__mean__w=1m | \n",
@@ -1268,52 +1872,52 @@
"
\n",
" \n",
" \n",
- " | 2017-06-13 15:19:13+02:00 | \n",
- " 1.272983 | \n",
- " 1.141252 | \n",
+ " 2017-06-13 15:39:28+02:00 | \n",
+ " 3.183709 | \n",
+ " 1.596755 | \n",
+ " 921.28009 | \n",
+ " 0.894696 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " 1.150205 | \n",
- " 1.156600 | \n",
- " 1.164273 | \n",
- " 1.191974 | \n",
" NaN | \n",
+ " 2.297914 | \n",
+ " -2.049654 | \n",
" ... | \n",
- " -0.810092 | \n",
- " 34.975834 | \n",
- " 34.950001 | \n",
- " 34.990002 | \n",
- " 34.990002 | \n",
- " 34.975845 | \n",
- " -0.058798 | \n",
- " -0.000345 | \n",
- " 0.028884 | \n",
- " 0.000834 | \n",
- "
\n",
- " \n",
- " | 2017-06-13 15:29:43+02:00 | \n",
- " 1.610625 | \n",
- " 1.311352 | \n",
- " 16156.117188 | \n",
- " -0.311375 | \n",
- " 1.121953 | \n",
- " 1.327978 | \n",
- " 1.352278 | \n",
- " 1.368264 | \n",
- " 1.439992 | \n",
- " 0.933501 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2017-06-13 15:40:13+02:00 | \n",
+ " 2.855021 | \n",
+ " 1.596755 | \n",
+ " 827.42395 | \n",
+ " 1.085358 | \n",
+ " 1.263708 | \n",
+ " 2.137228 | \n",
+ " 2.169202 | \n",
+ " 2.221959 | \n",
+ " 2.179614 | \n",
+ " 0.264285 | \n",
" ... | \n",
- " -0.514506 | \n",
- " 33.743000 | \n",
- " 33.730000 | \n",
- " 33.740000 | \n",
- " 33.750000 | \n",
- " 33.743008 | \n",
- " 0.442661 | \n",
- " 0.000050 | \n",
- " 0.022753 | \n",
- " 0.000518 | \n",
+ " -1.026206 | \n",
+ " 34.577831 | \n",
+ " 34.549999 | \n",
+ " 34.57 | \n",
+ " 34.610001 | \n",
+ " 34.577858 | \n",
+ " 0.256684 | \n",
+ " -0.000537 | \n",
+ " 0.040788 | \n",
+ " 0.001664 | \n",
"
\n",
" \n",
"\n",
@@ -1321,55 +1925,55 @@
""
],
"text/plain": [
- " EDA__amax__w=1m30s EDA__amin__w=1m30s \\\n",
- "timestamp \n",
- "2017-06-13 15:19:13+02:00 1.272983 1.141252 \n",
- "2017-06-13 15:29:43+02:00 1.610625 1.311352 \n",
+ " EDA__amax__w=2m EDA__amin__w=2m \\\n",
+ "timestamp \n",
+ "2017-06-13 15:39:28+02:00 3.183709 1.596755 \n",
+ "2017-06-13 15:40:13+02:00 2.855021 1.596755 \n",
"\n",
- " EDA__area__w=1h EDA__kurtosis__w=1h \\\n",
- "timestamp \n",
- "2017-06-13 15:19:13+02:00 NaN NaN \n",
- "2017-06-13 15:29:43+02:00 16156.117188 -0.311375 \n",
+ " EDA__area__w=1m30s EDA__kurtosis__w=1m30s \\\n",
+ "timestamp \n",
+ "2017-06-13 15:39:28+02:00 921.28009 0.894696 \n",
+ "2017-06-13 15:40:13+02:00 827.42395 1.085358 \n",
"\n",
" EDA__mean__w=1h EDA__quantile_0.25__w=30s \\\n",
"timestamp \n",
- "2017-06-13 15:19:13+02:00 NaN 1.150205 \n",
- "2017-06-13 15:29:43+02:00 1.121953 1.327978 \n",
+ "2017-06-13 15:39:28+02:00 NaN NaN \n",
+ "2017-06-13 15:40:13+02:00 1.263708 2.137228 \n",
"\n",
" EDA__quantile_0.5__w=30s \\\n",
"timestamp \n",
- "2017-06-13 15:19:13+02:00 1.156600 \n",
- "2017-06-13 15:29:43+02:00 1.352278 \n",
+ "2017-06-13 15:39:28+02:00 NaN \n",
+ "2017-06-13 15:40:13+02:00 2.169202 \n",
"\n",
- " EDA__quantile_0.75__w=30s EDA__rms__w=1m30s \\\n",
- "timestamp \n",
- "2017-06-13 15:19:13+02:00 1.164273 1.191974 \n",
- "2017-06-13 15:29:43+02:00 1.368264 1.439992 \n",
+ " EDA__quantile_0.75__w=30s EDA__rms__w=30s \\\n",
+ "timestamp \n",
+ "2017-06-13 15:39:28+02:00 NaN 2.297914 \n",
+ "2017-06-13 15:40:13+02:00 2.221959 2.179614 \n",
"\n",
- " EDA__skew__w=1h ... TMP__kurtosis__w=1m \\\n",
- "timestamp ... \n",
- "2017-06-13 15:19:13+02:00 NaN ... -0.810092 \n",
- "2017-06-13 15:29:43+02:00 0.933501 ... -0.514506 \n",
+ " EDA__skew__w=30s ... TMP__kurtosis__w=1m \\\n",
+ "timestamp ... \n",
+ "2017-06-13 15:39:28+02:00 -2.049654 ... NaN \n",
+ "2017-06-13 15:40:13+02:00 0.264285 ... -1.026206 \n",
"\n",
" TMP__mean__w=1m TMP__quantile_0.25__w=1m \\\n",
"timestamp \n",
- "2017-06-13 15:19:13+02:00 34.975834 34.950001 \n",
- "2017-06-13 15:29:43+02:00 33.743000 33.730000 \n",
+ "2017-06-13 15:39:28+02:00 NaN NaN \n",
+ "2017-06-13 15:40:13+02:00 34.577831 34.549999 \n",
"\n",
" TMP__quantile_0.5__w=1m TMP__quantile_0.75__w=1m \\\n",
"timestamp \n",
- "2017-06-13 15:19:13+02:00 34.990002 34.990002 \n",
- "2017-06-13 15:29:43+02:00 33.740000 33.750000 \n",
+ "2017-06-13 15:39:28+02:00 NaN NaN \n",
+ "2017-06-13 15:40:13+02:00 34.57 34.610001 \n",
"\n",
" TMP__rms__w=1m TMP__skew__w=1m TMP__slope__w=1m \\\n",
"timestamp \n",
- "2017-06-13 15:19:13+02:00 34.975845 -0.058798 -0.000345 \n",
- "2017-06-13 15:29:43+02:00 33.743008 0.442661 0.000050 \n",
+ "2017-06-13 15:39:28+02:00 NaN NaN NaN \n",
+ "2017-06-13 15:40:13+02:00 34.577858 0.256684 -0.000537 \n",
"\n",
" TMP__std__w=1m TMP__var__w=1m \n",
"timestamp \n",
- "2017-06-13 15:19:13+02:00 0.028884 0.000834 \n",
- "2017-06-13 15:29:43+02:00 0.022753 0.000518 \n",
+ "2017-06-13 15:39:28+02:00 NaN NaN \n",
+ "2017-06-13 15:40:13+02:00 0.040788 0.001664 \n",
"\n",
"[2 rows x 26 columns]"
]
@@ -1396,7 +2000,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1404,7 +2007,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1789,7 +2391,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1797,7 +2398,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {
"tags": []
@@ -1816,7 +2416,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1837,7 +2436,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {
"tags": []
@@ -1847,7 +2445,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1865,7 +2462,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {
"tags": []
@@ -1875,7 +2471,6 @@
]
},
{
- "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1973,7 +2568,7 @@
"hash": "bb9f799bd6e477ee22986cb3017f2b22764a696a803a513374640b5de3a6fdad"
},
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -1987,7 +2582,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.6"
+ "version": "3.11.3"
},
"metadata": {
"interpreter": {
diff --git a/tests/test_features_feature_collection.py b/tests/test_features_feature_collection.py
index ea931dc8..5ec526ff 100644
--- a/tests/test_features_feature_collection.py
+++ b/tests/test_features_feature_collection.py
@@ -2326,3 +2326,229 @@ def test_feature_collection_various_timezones_segment_start_idxs():
s_usa, segment_start_idxs=s_none.index[:3].values, n_jobs=0, return_df=True
)
assert np.all(res.values == [])
+
+
+@pytest.mark.parametrize(
+ "test_setup",
+ [
+ # (start_idx, exact_time, stride, window, expected_result)
+ (12, 5, None, None, 15),
+ (10, 5, None, None, 10),
+ (10, 2.5, None, None, 10),
+ (3, 2.5, None, None, 5.0),
+ (7, True, None, None, 7),
+ (10, False, None, 2, 10),
+ (10, False, None, 3, 12),
+ (100, False, 7, 3, 105),
+ (100, False, [7, 9, 11], 3, 693),
+ (100, False, [3.5, 7.0, 9.4], 3, 987),
+ ],
+)
+def test_process_non_exact_start_idx_int(test_setup):
+ start_idx, exact_time, stride, window, expected_result = test_setup
+ result = FeatureCollection._process_non_exact_start_idx(
+ start_idx, exact_time, stride, window
+ )
+
+ assert result == expected_result
+
+ assert isinstance(result, (int, float, np.int32, np.int64))
+
+
+@pytest.mark.parametrize(
+ "test_setup",
+ [
+ # (start_idx, exact_time, stride, window, expected_result)
+ (32.5, 12.4, None, None, 37.2),
+ (7.5, 2.5, None, None, 7.5),
+ (7.5, 5, None, None, 10.0),
+ (31.0, 21.29485, None, None, 42.5897),
+ (21.395, True, None, None, 21.395),
+ (21.395, False, 5, 7, 35.0),
+ (21.395, False, None, 20, 40.0),
+ (21.395, False, None, 17.475, 34.95),
+ (21.395, False, [2.5, 3.4, 5.0], 17, 85.0),
+ ],
+)
+def test_process_non_exact_start_idx_float(test_setup):
+ start_idx, exact_time, stride, window, expected_result = test_setup
+ result = FeatureCollection._process_non_exact_start_idx(
+ start_idx, exact_time, stride, window
+ )
+
+ assert result == expected_result
+
+ assert isinstance(result, float)
+
+
+@pytest.mark.parametrize(
+ "test_setup",
+ [
+ # (start_idx, exact_time, stride, window, expected_result)
+ (
+ pd.Timestamp("2019-01-01T09:00:00+0100"),
+ "1D",
+ None,
+ None,
+ pd.Timestamp("2019-01-02T00:00:00+0100"),
+ ),
+ (
+ pd.Timestamp("2019-01-01T09:00:00+0100"),
+ "1h",
+ None,
+ None,
+ pd.Timestamp("2019-01-01T09:00:00+0100"),
+ ),
+ (
+ pd.Timestamp("2019-01-01T10:08:55+0100"),
+ "2h",
+ None,
+ None,
+ pd.Timestamp("2019-01-01T12:00:00+0100"),
+ ),
+ (
+ pd.Timestamp("2019-01-01T09:08:16+0100"),
+ "15min",
+ None,
+ None,
+ pd.Timestamp("2019-01-01T09:15:00+0100"),
+ ),
+ (
+ pd.Timestamp("2019-01-01T09:00:00+0100"),
+ pd.Timedelta(1, "day"),
+ None,
+ None,
+ pd.Timestamp("2019-01-02T00:00:00+0100"),
+ ),
+ (
+ pd.Timestamp("2019-01-01T09:00:00+0100"),
+ pd.Timedelta(1, "hour"),
+ None,
+ None,
+ pd.Timestamp("2019-01-01T09:00:00+0100"),
+ ),
+ (
+ pd.Timestamp("2019-01-01T10:08:55+0100"),
+ pd.Timedelta(2, "hours"),
+ None,
+ None,
+ pd.Timestamp("2019-01-01T12:00:00+0100"),
+ ),
+ (
+ pd.Timestamp("2019-01-01T09:08:16+0100"),
+ pd.Timedelta(15, "minutes"),
+ None,
+ None,
+ pd.Timestamp("2019-01-01T09:15:00+0100"),
+ ),
+ (
+ pd.Timestamp("2019-01-01T10:08:55+0100"),
+ True,
+ None,
+ None,
+ pd.Timestamp("2019-01-01T10:08:55+0100"),
+ ),
+ (
+ pd.Timestamp("2019-01-01T09:00:00+0100"),
+ False,
+ None,
+ pd.Timedelta(1, "hour"),
+ pd.Timestamp("2019-01-01T09:00:00+0100"),
+ ),
+ (
+ pd.Timestamp("2019-01-01T09:00:00+0100"),
+ False,
+ pd.Timedelta(2, "hours"),
+ pd.Timedelta(45, "minutes"),
+ pd.Timestamp("2019-01-01T12:00:00+0100"),
+ ),
+ (
+ pd.Timestamp("2019-01-01T10:08:55+0100"),
+ False,
+ [
+ pd.Timedelta(20, "minutes"),
+ pd.Timedelta(30, "minutes"),
+ pd.Timedelta(60, "minutes"),
+ ],
+ pd.Timedelta(10, "minutes"),
+ pd.Timestamp("2019-01-01T11:00:00+0100"),
+ ),
+ ],
+)
+def test_process_non_exact_start_idx_timestamp(test_setup):
+ start_idx, exact_time, stride, window, expected_result = test_setup
+ result = FeatureCollection._process_non_exact_start_idx(
+ start_idx, exact_time, stride, window
+ )
+
+ assert result == expected_result
+
+ assert isinstance(result, pd.Timestamp)
+
+
+@pytest.mark.parametrize(
+ "test_setup",
+ [
+ # (start_idx, stride)
+ (100, None),
+ (100, [7, 9, 11]),
+ (21.395, None),
+ (21.395, [2.5, 5.0]),
+ (pd.Timestamp("2019-01-01T10:08:55+0100"), None),
+ (
+ pd.Timestamp("2019-01-01T10:08:55+0100"),
+ [
+ pd.Timedelta(20, "minutes"),
+ pd.Timedelta(30, "minutes"),
+ pd.Timedelta(60, "minutes"),
+ ],
+ ),
+ ],
+)
+def test_process_non_exact_start_idx_window_none(test_setup):
+ start_idx, stride = test_setup
+ with pytest.raises(AssertionError, match=r".*window argument is required.*"):
+ FeatureCollection._process_non_exact_start_idx(start_idx, False, stride, None)
+
+
+@pytest.mark.parametrize(
+ "test_setup",
+ [
+ # (start_idx, exact_time, stride, window)
+ (5, False, None, pd.Timedelta(10, "minutes")),
+ (5, False, 7, pd.Timedelta(10, "minutes")),
+ (5, False, 7.95, pd.Timedelta(10, "minutes")),
+ (5, False, pd.Timedelta(20, "minutes"), pd.Timedelta(10, "minutes")),
+ (5.25, False, None, pd.Timedelta(10, "minutes")),
+ (5.25, False, 7, pd.Timedelta(10, "minutes")),
+ (5.25, False, 7.95, pd.Timedelta(10, "minutes")),
+ (5.25, False, pd.Timedelta(20, "minutes"), pd.Timedelta(10, "minutes")),
+ (pd.Timestamp("2019-01-01T10:08:55+0100"), False, None, 5),
+ (pd.Timestamp("2019-01-01T10:08:55+0100"), False, None, 5.75),
+ (pd.Timestamp("2019-01-01T10:08:55+0100"), False, 7, 5),
+ (pd.Timestamp("2019-01-01T10:08:55+0100"), False, 7.95, 5.75),
+ (
+ pd.Timestamp("2019-01-01T10:08:55+0100"),
+ False,
+ pd.Timedelta(10, "minutes"),
+ 5,
+ ),
+ (
+ pd.Timestamp("2019-01-01T10:08:55+0100"),
+ False,
+ 7.95,
+ pd.Timedelta(10, "minutes"),
+ ),
+ ],
+)
+def test_process_non_exact_start_idx_incorrect_types(test_setup):
+ start_idx, exact_time, stride, window = test_setup
+ with pytest.raises((AssertionError, ValueError)):
+ FeatureCollection._process_non_exact_start_idx(
+ start_idx, exact_time, stride, window
+ )
+
+
+def test_process_non_exact_start_idx_unsupported_type():
+ with pytest.raises(TypeError, match=r".*not supported as `exact_time` argument.*"):
+ FeatureCollection._process_non_exact_start_idx(2, dict(), None, None)
diff --git a/tsflex/features/feature_collection.py b/tsflex/features/feature_collection.py
index b4ed870a..b31f31be 100644
--- a/tsflex/features/feature_collection.py
+++ b/tsflex/features/feature_collection.py
@@ -12,10 +12,12 @@
__author__ = "Jonas Van Der Donckt, Emiel Deprost, Jeroen Van Der Donckt"
+import math
import os
import traceback
import uuid
from copy import deepcopy
+from datetime import datetime
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
@@ -26,7 +28,7 @@
from tqdm.auto import tqdm
from ..features.function_wrapper import FuncWrapper
-from ..utils.attribute_parsing import AttributeParser
+from ..utils.attribute_parsing import AttributeParser, DataType
from ..utils.data import flatten, to_list, to_series_list
from ..utils.logging import add_logging_handler, delete_logging_handlers
from ..utils.time import parse_time_arg, timedelta_to_str
@@ -275,6 +277,7 @@ def _stroll_feat_generator(
segment_start_idxs: Union[np.ndarray, None],
segment_end_idxs: Union[np.ndarray, None],
start_idx: Any,
+ exact_time,
end_idx: Any,
window_idx: str,
include_final_window: bool,
@@ -297,6 +300,11 @@ def get_stroll_function(idx) -> Tuple[StridedRolling, FuncWrapper]:
]
stride = feature.stride if calc_stride is None else calc_stride
function: FuncWrapper = feature.function
+
+ cleaned_start_idx = self._process_non_exact_start_idx(
+ start_idx, exact_time, calc_stride, win
+ )
+
# The factory method will instantiate the right StridedRolling object
stroll_arg_dict = dict(
data=[series_dict[k] for k in key],
@@ -304,7 +312,7 @@ def get_stroll_function(idx) -> Tuple[StridedRolling, FuncWrapper]:
strides=stride,
segment_start_idxs=segment_start_idxs,
segment_end_idxs=segment_end_idxs,
- start_idx=start_idx,
+ start_idx=cleaned_start_idx,
end_idx=end_idx,
window_idx=window_idx,
include_final_window=include_final_window,
@@ -341,6 +349,164 @@ def _process_segment_idxs(
segment_idxs = segment_idxs.squeeze() # remove singleton dimensions
return segment_idxs
+ @staticmethod
+ def _process_non_exact_start_idx(
+ start_idx: Union[pd.Timestamp, float, int],
+ exact_time: Union[bool, str, pd.Timedelta, int, float],
+ stride: Optional[Union[float, str, pd.Timedelta, List, None]] = None,
+ window: Optional[Union[float, str, pd.Timedelta]] = None,
+ ) -> Union[pd.Timestamp, float, int]:
+ """Round start idx according to value of `exact_time` parameter.
+
+ Parameters
+ ----------
+ start_idx : pd.Timestamp
+ exact_time : Union[bool, str, pd.Timedelta, int, float]
+ How to perform the start index rounding. This argument supports multiple types:\n
+ * If the type is a `bool`, rounding resolution will be calculated using
+ least common multiple of stride and window.
+ * If the type is a `str`, the string must represent a frequency string indicating
+ the rounding resolution. Hence, the **passed data must have a time-index**.
+ * If the type is an `float` or an `int`, its value represents the series:\n
+ - its stride **range** when a **non time-indexed** series is passed.
+ * If the exact_time's type is a `pd.Timedelta`, the exact_time size represents
+ the exact_time-time delta. The passed data **must have a time-index**.
+ start_idx is rounded to multiple of exact_time, using ceiling rounding.
+ stride: Union[float, str, pd.Timedelta, List[Union[float, str, pd.Timedelta], None], optional
+ The stride size. By default None. This argument supports multiple types: \n
+ * If None, the stride of the `FeatureDescriptor` objects will be used.
+ * If the type is an `float` or an `int`, its value represents the series:\n
+ - its stride **range** when a **non time-indexed** series is passed.
+ - the stride in **number of samples**, when a **time-indexed** series
+ is passed (must then be and `int`)
+ * If the stride's type is a `pd.Timedelta`, the stride size represents
+ the stride-time delta. The passed data **must have a time-index**.
+ * If a `str`, it must represent a stride-time-delta-string. Hence, the
+ **passed data must have a time-index**. \n
+ window : Union[float, str, pd.Timedelta], optional
+
+ Returns
+ -------
+ start index rounded to `exact_time`.
+ return value type depends on start_idx type
+
+ .. note::
+ ```md
+ | index datatype | rounding datatype | return datatype | extra info |
+ | :------------- | :---------------- | --------------: | :---------------------------------------------------------------- |
+ | int | int | int | round `index` to nearest multiple of `rounding` |
+ | | float | float | round `index` to nearest multiple of `rounding` |
+ | | bool | float | round `index` to LCM of `window` and/or `stride` |
+ | float | int | float | round `index` to nearest multiple of `rounding` |
+ | | float | float | round `index` to nearest multiple of `rounding` |
+ | | bool | float | round `index` to LCM of `window` and/or `stride` |
+ | pd.Timestamp | str | pd.Timestamp | round `index` to resolution of `rounding` (e.g. '10s', '2m', 'H') |
+ | | bool | pd.Timestamp | round `index` to LCM of `window` and/or `stride` |
+ | | pd.Timedelta | pd.Timestamp | round `index` to nearest multiple of `rounding`. |
+ ```
+ """
+
+ def numeric_ceil(index, round_to):
+ int_div = index // round_to
+ ceil_offset = int((index % round_to) != 0) * round_to
+ return (int_div * round_to) + ceil_offset
+
+ def dt_ceil(dt, delta):
+ start_time_py = start_idx.to_pydatetime()
+ delta_py_time = delta.to_pytimedelta()
+ dt_min = datetime.min
+ dt_min = dt_min.replace(tzinfo=dt.tzinfo)
+ dt_ceil = (
+ dt_min
+ + math.ceil((start_time_py - dt_min) / delta_py_time) * delta_py_time
+ )
+ return pd.Timestamp(dt_ceil)
+
+ def is_float(x) -> bool:
+ xt = type(x)
+ return isinstance(x, float) or np.issubdtype(xt, np.floating)
+
+ def is_int(x) -> bool:
+ xt = type(x)
+ return isinstance(x, int) or np.issubdtype(xt, np.integer)
+
+ def is_numeric(x) -> bool:
+ return is_float(x) or is_int(x)
+
+ exact_time_type = type(exact_time)
+ if isinstance(exact_time, bool):
+ # default case, exact_time is True by default
+ if exact_time:
+ return start_idx
+ # calculate LCM of window & stride
+ assert (
+ window is not None
+ ), "if exact_time is `False`, then the window argument is required"
+ if stride is None:
+ strides = []
+ elif isinstance(stride, list) or isinstance(stride, np.ndarray):
+ strides = [s for s in stride]
+ else:
+ strides = [stride]
+
+ # as we need the LCM of the whole list, we can just append the window
+ lcm_values = strides + [window]
+
+ arg_dtype = AttributeParser.determine_type(lcm_values)
+
+ # should not happen, but extra check to cover the possibilities
+ assert arg_dtype != DataType.UNDEFINED
+
+ if arg_dtype == DataType.SEQUENCE:
+ idx_dtype = AttributeParser.determine_type(start_idx)
+ assert idx_dtype == DataType.SEQUENCE
+ # numeric LCM
+ if is_int(lcm_values[0]):
+ lcm = np.lcm.reduce(lcm_values)
+ else:
+ # slightly ugly way to perform LCM on floats
+ PRECISION = 5
+ for idx, val in enumerate(lcm_values):
+ rounded_val = np.round(val, decimals=PRECISION)
+ lcm_values[idx] = int(rounded_val * 10**PRECISION)
+ lcm_val = np.lcm.reduce(lcm_values)
+ lcm = lcm_val / 10**PRECISION
+
+ # round start_idx to nearest value of lcm
+ return numeric_ceil(start_idx, lcm)
+
+ else:
+ assert isinstance(start_idx, pd.Timestamp)
+ # transform to timedeltas and use numeric LCM on asm8 nanoseconds
+ for idx, val in enumerate(lcm_values):
+ parsed_time = parse_time_arg(val)
+ lcm_values[idx] = parsed_time.asm8.astype(np.int64)
+
+ lcm_ns = np.lcm.reduce(lcm_values)
+ lcm_timedelta = pd.Timedelta(lcm_ns)
+
+ ceiltime_timestamp = dt_ceil(start_idx, lcm_timedelta)
+ return ceiltime_timestamp
+
+ elif is_float(exact_time):
+ assert is_numeric(start_idx)
+ return numeric_ceil(float(start_idx), exact_time)
+ elif is_int(exact_time):
+ assert is_numeric(start_idx)
+ return numeric_ceil(start_idx, exact_time)
+ elif isinstance(exact_time, str):
+ assert isinstance(start_idx, pd.Timestamp)
+ # should be normal time offset string
+ return start_idx.ceil(exact_time)
+ elif isinstance(exact_time, pd.Timedelta):
+ assert isinstance(start_idx, pd.Timestamp)
+ rounded_timestamp = dt_ceil(start_idx, exact_time)
+ return rounded_timestamp
+ else:
+ raise TypeError(
+ f"type: {exact_time_type} is not supported as `exact_time` argument - {exact_time}"
+ )
+
def calculate(
self,
data: Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]],
@@ -349,6 +515,7 @@ def calculate(
Union[list, np.ndarray, pd.Series, pd.Index]
] = None,
segment_end_idxs: Optional[Union[list, np.ndarray, pd.Series, pd.Index]] = None,
+ exact_time: Optional[Union[bool, str, pd.Timedelta, int, float]] = True,
return_df: Optional[bool] = False,
window_idx: Optional[str] = "end",
include_final_window: Optional[bool] = False,
@@ -419,6 +586,31 @@ def calculate(
As such, the user can create variable-length segmented windows. However,
in such cases, the user should be weary that the feature functions are
invariant to these (potentially variable-length) windows.
+ exact_time: Optional[Union[bool, str, pd.Timedelta, int, float]], optional
+ Perform the start index rounding. This argument supports multiple types:\n
+ * If the type is a `bool`, rounding resolution will be calculated using
+ least common multiple of stride and window.
+ * If the type is a `str`, the string must represent a frequency string indicating
+ the rounding resolution. Hence, the **passed data must have a time-index**.
+ * If the type is an `float` or an `int`, its value represents the series:\n
+ - its stride **range** when a **non time-indexed** series is passed.
+ * If the exact_time's type is a `pd.Timedelta`, the exact_time size represents
+ the exact_time-time delta. The passed data **must have a time-index**.
+ start_idx is rounded to multiple of exact_time, using ceiling rounding.
+
+ The functioning of this parameter can be described by the following table:
+
+ | index datatype | rounding datatype | return datatype | extra info |
+ | :------------- | :---------------- | --------------: | ----------------------------------------------------------------: |
+ | int | int | int | round `index` to nearest multiple of `rounding` |
+ | | float | float | round `index` to nearest multiple of `rounding` |
+ | | bool | float | round `index` to LCM of `window` and/or `stride` |
+ | float | int | float | round `index` to nearest multiple of `rounding` |
+ | | float | float | round `index` to nearest multiple of `rounding` |
+ | | bool | float | round `index` to LCM of `window` and/or `stride` |
+ | pd.Timestamp | str | pd.Timestamp | round `index` to resolution of `rounding` (e.g. '10s', '2m', 'H') |
+ | | bool | pd.Timestamp | round `index` to LCM of `window` and/or `stride` |
+ | | pd.Timedelta | pd.Timestamp | round `index` to nearest multiple of `rounding`. |
return_df : bool, optional
Whether the output needs to be a DataFrame or a list thereof, by default
False. If `True` the output dataframes will be merged to a DataFrame with an
@@ -592,6 +784,7 @@ def calculate(
# Determine the bounds of the series dict items and slice on them
# TODO: is dit wel nodig `hier? want we doen dat ook in de strided rolling
start, end = _determine_bounds(bound_method, list(series_dict.values()))
+
series_dict = {
n: s.loc[
s.index.dtype.type(start) : s.index.dtype.type(end)
@@ -608,6 +801,7 @@ def calculate(
segment_start_idxs=segment_start_idxs,
segment_end_idxs=segment_end_idxs,
start_idx=start,
+ exact_time=exact_time,
end_idx=end,
window_idx=window_idx,
include_final_window=include_final_window,