ResampleWithDistributionTransform doesn't work correctly with current behaviour of HolidaysTransform
Cast columns to numerical types before resampling. It should like in _SklearnAdapter.
import etna
from etna.commands import mult
from etna.datasets import TSDataset
from etna.datasets.datasets_generation import generate_from_patterns_df
from etna.loggers import tslogger
from etna.metrics import MAE
from etna.metrics import MSE
from etna.metrics import SMAPE
from etna.metrics import MedAE
from etna.metrics import Sign
from etna.pipeline import Pipeline
from hydra_slayer import get_from_params
from omegaconf import OmegaConf
periods_x_freq = {
"D": 300,
"H": 300 * 24,
"T": 10 * 24 * 60,
"MS": 50,
"W-MON": 100,
"W-SUN": 100,
"W": 100,
}
freq = 'H'
ts = generate_from_patterns_df(
periods=periods_x_freq[freq],
start_time="1990-01-01",
freq=freq,
patterns=[
[10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
[100, 90, 80, 70, 60, 50, 40, 30, 20, 10],
[20, 40, 40, 50],
],
)
ts = TSDataset.to_dataset(ts)
ts = TSDataset(ts, freq=freq)
model = {
"_target_": "etna.pipeline.Pipeline",
"horizon": 2,
"model": {
"_target_": "etna.models.ElasticMultiSegmentModel"
},
"transforms": [
{
"_target_": "etna.transforms.TimeSeriesImputerTransform",
"in_column": "target",
"strategy": "constant"
},
{
"_target_": "etna.transforms.HolidayTransform",
"out_column": "holiday_regressor"
},
{
"_target_": "etna.transforms.ResampleWithDistributionTransform",
"distribution_column": "target",
"in_column": "holiday_regressor"
},
{
"_target_": "etna.transforms.SegmentEncoderTransform"
},
{
"_target_": "etna.transforms.LagTransform",
"in_column": "target",
"lags": "${shift:${horizon},[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168]}"
}
]
}
config = OmegaConf.create(model)
config = OmegaConf.to_container(config, resolve=True)
metrics = [Sign(), SMAPE(), MAE(), MSE(), MedAE()]
pipeline: Pipeline = get_from_params(**config)
metrics_df, forecast_df, fold_info_df = pipeline.backtest(
ts,
metrics=metrics,
)
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df["fold"] = self._get_folds(df)
Traceback (most recent call last):
File "/workspaces/etna/t.py", line 78, in <module>
metrics_df, forecast_df, fold_info_df = pipeline.backtest(
File "/workspaces/etna/etna/pipeline/base.py", line 966, in backtest
self._folds = self._run_all_folds(
File "/workspaces/etna/etna/pipeline/base.py", line 831, in _run_all_folds
pipelines = parallel(
File "/home/codespace/.cache/pypoetry/virtualenvs/etna-cCDvSR3a-py3.10/lib/python3.10/site-packages/joblib/parallel.py", line 1085, in __call__
if self.dispatch_one_batch(iterator):
File "/home/codespace/.cache/pypoetry/virtualenvs/etna-cCDvSR3a-py3.10/lib/python3.10/site-packages/joblib/parallel.py", line 901, in dispatch_one_batch
self._dispatch(tasks)
File "/home/codespace/.cache/pypoetry/virtualenvs/etna-cCDvSR3a-py3.10/lib/python3.10/site-packages/joblib/parallel.py", line 819, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/home/codespace/.cache/pypoetry/virtualenvs/etna-cCDvSR3a-py3.10/lib/python3.10/site-packages/joblib/_parallel_backends.py", line 208, in apply_async
result = ImmediateResult(func)
File "/home/codespace/.cache/pypoetry/virtualenvs/etna-cCDvSR3a-py3.10/lib/python3.10/site-packages/joblib/_parallel_backends.py", line 597, in __init__
self.results = batch()
File "/home/codespace/.cache/pypoetry/virtualenvs/etna-cCDvSR3a-py3.10/lib/python3.10/site-packages/joblib/parallel.py", line 288, in __call__
return [func(*args, **kwargs)
File "/home/codespace/.cache/pypoetry/virtualenvs/etna-cCDvSR3a-py3.10/lib/python3.10/site-packages/joblib/parallel.py", line 288, in <listcomp>
return [func(*args, **kwargs)
File "/workspaces/etna/etna/pipeline/base.py", line 678, in _fit_backtest_pipeline
pipeline.fit(ts=ts)
File "/workspaces/etna/etna/pipeline/pipeline.py", line 56, in fit
self.ts.fit_transform(self.transforms)
File "/workspaces/etna/etna/datasets/tsdataset.py", line 200, in fit_transform
transform.fit_transform(self)
File "/workspaces/etna/etna/transforms/base.py", line 145, in fit_transform
return self.fit(ts=ts).transform(ts=ts)
File "/workspaces/etna/etna/transforms/base.py", line 126, in transform
df_transformed = self._transform(df=df)
File "/workspaces/etna/etna/transforms/base.py", line 366, in _transform
seg_df = segment_transform.transform(df[segment])
File "/workspaces/etna/etna/transforms/missing_values/resample.py", line 101, in transform
df[self.out_column] = df[self.in_column].ffill() * df["distribution"]
File "/home/codespace/.cache/pypoetry/virtualenvs/etna-cCDvSR3a-py3.10/lib/python3.10/site-packages/pandas/core/ops/common.py", line 72, in new_method
return method(self, other)
File "/home/codespace/.cache/pypoetry/virtualenvs/etna-cCDvSR3a-py3.10/lib/python3.10/site-packages/pandas/core/arraylike.py", line 118, in __mul__
return self._arith_method(other, operator.mul)
File "/home/codespace/.cache/pypoetry/virtualenvs/etna-cCDvSR3a-py3.10/lib/python3.10/site-packages/pandas/core/series.py", line 6259, in _arith_method
return base.IndexOpsMixin._arith_method(self, other, op)
File "/home/codespace/.cache/pypoetry/virtualenvs/etna-cCDvSR3a-py3.10/lib/python3.10/site-packages/pandas/core/base.py", line 1325, in _arith_method
result = ops.arithmetic_op(lvalues, rvalues, op)
File "/home/codespace/.cache/pypoetry/virtualenvs/etna-cCDvSR3a-py3.10/lib/python3.10/site-packages/pandas/core/ops/array_ops.py", line 218, in arithmetic_op
res_values = op(left, right)
File "/home/codespace/.cache/pypoetry/virtualenvs/etna-cCDvSR3a-py3.10/lib/python3.10/site-packages/pandas/core/arrays/categorical.py", line 1639, in __array_ufunc__
raise TypeError(
TypeError: Object with dtype category cannot perform the numpy op multiply
🐛 Bug Report
ResampleWithDistributionTransform doesn't work correctly with current behaviour of HolidaysTransform
Expected behavior
Cast columns to numerical types before resampling. It should like in
_SklearnAdapter.How To Reproduce
Environment
No response
Additional context
Checklist