Skip to content

Commit 4e176ba

Browse files
davmretensorflower-gardener
authored andcommitted
Add utility to STS to build a default model from a Pandas series or dataframe.
Q: Should this support other model components, like regression against user-provided covariates? A: I think it's best to start simple; we can always add options later. An alternative would be to make it easy to extend the default model, e.g., implement an `add_component` method for `Sum` instances. Q: What should we name this? A: I've been swinging between 'auto' and 'default' prefixes, and we could also try to get at the fact that what we're specifically automating is constructing seasonal components from a list of dates. For example: - `tfp.sts.auto_build_model` - `tfp.sts.build_default_model` - `tfp.sts.default_model` - `tfp.sts.model_from_dates` Thoughts welcome. PiperOrigin-RevId: 383763071
1 parent 5f8765c commit 4e176ba

File tree

3 files changed

+344
-0
lines changed

3 files changed

+344
-0
lines changed

tensorflow_probability/python/sts/BUILD

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ py_library(
2929
srcs_version = "PY3",
3030
deps = [
3131
":decomposition",
32+
":default_model",
3233
":fitting",
3334
":forecast",
3435
":regularization",
@@ -66,6 +67,38 @@ py_test(
6667
],
6768
)
6869

70+
py_library(
71+
name = "default_model",
72+
srcs = ["default_model.py"],
73+
srcs_version = "PY3",
74+
deps = [
75+
":regularization",
76+
":structural_time_series",
77+
# numpy dep,
78+
# tensorflow dep,
79+
"//tensorflow_probability/python/distributions:inverse_gamma",
80+
"//tensorflow_probability/python/distributions:normal",
81+
"//tensorflow_probability/python/experimental/sts_gibbs",
82+
"//tensorflow_probability/python/experimental/util",
83+
"//tensorflow_probability/python/sts/components",
84+
"//tensorflow_probability/python/sts/internal",
85+
],
86+
)
87+
88+
py_test(
89+
name = "default_model_test",
90+
size = "medium",
91+
srcs = ["default_model_test.py"],
92+
deps = [
93+
# absl/testing:parameterized dep,
94+
# numpy dep,
95+
# pandas dep,
96+
# tensorflow dep,
97+
"//tensorflow_probability",
98+
"//tensorflow_probability/python/internal:test_util",
99+
],
100+
)
101+
69102
py_library(
70103
name = "fitting",
71104
srcs = ["fitting.py"],
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
# Copyright 2021 The TensorFlow Probability Authors.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ============================================================================
15+
"""Utilities for automatically building StructuralTimeSeries models."""
16+
17+
import tensorflow.compat.v2 as tf
18+
19+
from tensorflow_probability.python.sts import components as sts_components
20+
from tensorflow_probability.python.sts import structural_time_series
21+
from tensorflow_probability.python.sts.internal import seasonality_util
22+
from tensorflow_probability.python.sts.internal import util as sts_util
23+
24+
__all__ = [
25+
'build_default_model',
26+
]
27+
28+
29+
# TODO(davmre): before exposing publicly, consider simplifying this function
30+
# (e.g., not exposing prior specification args) and/or renaming it to something
31+
# like `auto_build_model`.
32+
def build_default_model(observed_time_series,
33+
base_component=sts_components.LocalLinearTrend,
34+
observation_noise_scale_prior=None,
35+
drift_scale_prior=None,
36+
allow_seasonal_effect_drift=True,
37+
name=None):
38+
"""Builds a model with seasonality from a Pandas Series or DataFrame.
39+
40+
Returns a model of the form
41+
`tfp.sts.Sum([base_component] + seasonal_components)`, where
42+
`seasonal_components` are automatically selected using the frequency from the
43+
`DatetimeIndex` of the provided `pd.Series` or `pd.DataFrame`. If the index
44+
does not have a set frequency, one will be inferred from the index dates, and
45+
46+
Args:
47+
observed_time_series: Instance of `pd.Series` or `pd.DataFrame` containing
48+
one or more time series indexed by a `pd.DatetimeIndex`.
49+
base_component: Optional subclass of `tfp.sts.StructuralTimeSeries`
50+
specifying the model used for residual variation in the series not
51+
explained by seasonal or other effects. May also be an *instance* of such
52+
a class with specific priors set; if not provided, such an instance will
53+
be constructed with heuristic default priors.
54+
Default value: `tfp.sts.LocalLinearTrend`.
55+
observation_noise_scale_prior: Optional `tfd.Distribution` instance
56+
specifying a prior on `observation_noise_scale`. If `None`, a heuristic
57+
default prior is constructed based on the provided `observed_time_series`.
58+
Default value: `None`.
59+
drift_scale_prior: Optional `tfd.Distribution` instance
60+
specifying a prior on the `drift_scale` parameter of Seasonal components.
61+
If `None`, a heuristic default prior is constructed based on the provided
62+
`observed_time_series`.
63+
Default value: `None`.
64+
allow_seasonal_effect_drift: optional Python `bool` specifying whether the
65+
seasonal effects can drift over time. Setting this to `False`
66+
removes the `drift_scale` parameter from the model. This is
67+
mathematically equivalent to `drift_scale_prior = tfd.Deterministic(0.)`,
68+
but removing drift directly is preferred because it avoids the use of a
69+
degenerate prior.
70+
Default value: `True`.
71+
name: Python `str` name for ops created by this function.
72+
Default value: `None` (i.e., 'build_default_model').
73+
Returns:
74+
model: instance of `tfp.sts.Sum` representing a model for the given data.
75+
76+
#### Example
77+
78+
Consider a series of eleven data points, covering a period of two weeks
79+
with three missing days.
80+
81+
```python
82+
import pandas as pd
83+
import tensorflow as tf
84+
import tensorflow_probability as tfp
85+
86+
series = pd.Series(
87+
[100., 27., 92., 66., 51., 126., 113., 95., 48., 20., 59.,],
88+
index=pd.to_datetime(['2020-01-01', '2020-01-02', '2020-01-04',
89+
'2020-01-05', '2020-01-06', '2020-01-07',
90+
'2020-01-10', '2020-01-11', '2020-01-12',
91+
'2020-01-13', '2020-01-14']))
92+
```
93+
94+
Before calling `build_default_model`, we must regularize the series to follow
95+
a fixed frequency (here, daily observations):
96+
97+
```python
98+
series = tfp.sts.regularize_series(series)
99+
# len(series) ==> 14
100+
```
101+
102+
The default model will combine a LocalLinearTrend baseline with a Seasonal
103+
component to capture day-of-week effects. We can then fit this model to our
104+
observed data. Here we'll use variational inference:
105+
106+
```python
107+
model = tfp.sts.build_default_model(series)
108+
# len(model.components) == 2
109+
110+
# Fit the model using variational inference.
111+
surrogate_posterior = tfp.sts.build_factored_surrogate_posterior(model)
112+
losses = tfp.vi.fit_surrogate_posterior(
113+
target_log_prob_fn=model.joint_log_prob(series),
114+
surrogate_posterior=surrogate_posterior,
115+
optimizer=tf.optimizers.Adam(0.1),
116+
num_steps=1000,
117+
convergence_criterion=(
118+
tfp.optimizer.convergence_criteria.SuccessiveGradientsAreUncorrelated(
119+
window_size=20, min_num_steps=50)),
120+
jit_compile=True)
121+
parameter_samples = surrogate_posterior.sample(50)
122+
```
123+
124+
Finally, use the fitted parameters to forecast the next week of data:
125+
126+
```python
127+
forecast_dist = tfp.sts.forecast(model,
128+
observed_time_series=series,
129+
parameter_samples=parameter_samples,
130+
num_steps_forecast=7)
131+
# Strip trailing unit dimension from LinearGaussianStateSpaceModel events.
132+
forecast_mean = forecast_dist.mean()[..., 0]
133+
forecast_stddev = forecast_dist.stddev()[..., 0]
134+
135+
forecast = pd.DataFrame(
136+
{'mean': forecast_mean,
137+
'lower_bound': forecast_mean - 2. * forecast_stddev,
138+
'upper_bound': forecast_mean + 2. * forecast_stddev}
139+
index=pd.date_range(start=series.index[-1] + series.index.freq,
140+
periods=7,
141+
freq=series.index.freq))
142+
```
143+
144+
"""
145+
with tf.name_scope(name or 'build_default_model'):
146+
frequency = getattr(observed_time_series.index, 'freq', None)
147+
if frequency is None:
148+
raise ValueError('Provided series has no set frequency. Consider '
149+
'using `tfp.sts.regularize_series` to infer a frequency '
150+
'and build a regularly spaced series.')
151+
observed_time_series = sts_util.canonicalize_observed_time_series_with_mask(
152+
observed_time_series)
153+
154+
if not isinstance(base_component,
155+
structural_time_series.StructuralTimeSeries):
156+
# Build a component of the given type using default priors.
157+
base_component = base_component(observed_time_series=observed_time_series)
158+
159+
components = [base_component]
160+
seasonal_structure = seasonality_util.create_seasonal_structure(
161+
frequency=frequency,
162+
num_steps=int(observed_time_series.time_series.shape[-2]))
163+
for season_type, season in seasonal_structure.items():
164+
components.append(
165+
sts_components.Seasonal(num_seasons=season.num,
166+
num_steps_per_season=season.duration,
167+
drift_scale_prior=drift_scale_prior,
168+
allow_drift=allow_seasonal_effect_drift,
169+
observed_time_series=observed_time_series,
170+
name=str(season_type)))
171+
return sts_components.Sum(
172+
components,
173+
observed_time_series=observed_time_series,
174+
observation_noise_scale_prior=observation_noise_scale_prior)
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
# Copyright 2021 The TensorFlow Probability Authors.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ============================================================================
15+
"""Tests for automatically building StructuralTimeSeries models."""
16+
17+
# Dependency imports
18+
19+
import pandas as pd
20+
import tensorflow.compat.v2 as tf
21+
import tensorflow_probability as tfp
22+
23+
from tensorflow_probability.python.internal import test_util
24+
from tensorflow_probability.python.sts import default_model
25+
26+
tfb = tfp.bijectors
27+
tfd = tfp.distributions
28+
29+
30+
class DefaultModelTests(test_util.TestCase):
31+
32+
def _build_test_series(self, shape, freq, start='2020-01-01 00:00:00'):
33+
values = self.evaluate(tf.random.stateless_normal(
34+
shape, seed=test_util.test_seed(sampler_type='stateless')))
35+
index = pd.date_range('2020-01-01 00:00:00',
36+
periods=shape[0],
37+
freq=freq)
38+
if len(shape) > 1:
39+
num_columns = shape[1]
40+
return pd.DataFrame(values,
41+
columns=['series{}'.format(i)
42+
for i in range(num_columns)],
43+
index=index)
44+
else:
45+
return pd.Series(values, index=index)
46+
47+
def test_has_expected_seasonality(self):
48+
model = default_model.build_default_model(
49+
self._build_test_series(shape=[168 * 2], freq=pd.DateOffset(hours=1)))
50+
51+
self.assertIsInstance(model, tfp.sts.Sum)
52+
self.assertLen(model.components, 3)
53+
self.assertIsInstance(model.components[0], tfp.sts.LocalLinearTrend)
54+
self.assertIsInstance(model.components[1], tfp.sts.Seasonal)
55+
self.assertContainsSubsequence(model.components[1].name, 'HOUR_OF_DAY')
56+
self.assertIsInstance(model.components[2], tfp.sts.Seasonal)
57+
self.assertContainsSubsequence(model.components[2].name, 'DAY_OF_WEEK')
58+
59+
def test_explicit_base_component_and_priors(self):
60+
series = self._build_test_series(shape=[48], freq=pd.DateOffset(hours=1))
61+
model = default_model.build_default_model(
62+
series,
63+
base_component=tfp.sts.SemiLocalLinearTrend(
64+
level_scale_prior=tfd.Exponential(5.),
65+
slope_scale_prior=tfd.Exponential(0.1),
66+
slope_mean_prior=tfd.Normal(0., 100.),
67+
constrain_ar_coef_positive=True,
68+
constrain_ar_coef_stationary=True,
69+
observed_time_series=series),
70+
observation_noise_scale_prior=tfd.Exponential(3.),
71+
drift_scale_prior=tfd.Exponential(1.))
72+
self.assertLen(model.components, 2)
73+
74+
param_by_name = lambda n: [p for p in model.parameters if n in p.name][0]
75+
self.assertAllClose(param_by_name('level_scale').prior.rate, 5.)
76+
self.assertAllClose(param_by_name('slope_mean').prior.scale, 100.)
77+
self.assertAllClose(param_by_name('slope_scale').prior.rate, 0.1)
78+
self.assertAllClose(param_by_name('drift_scale').prior.rate, 1.)
79+
self.assertAllClose(param_by_name('observation_noise_scale').prior.rate, 3.)
80+
81+
def test_creates_batch_model_from_multiple_series(self):
82+
model = default_model.build_default_model(
83+
self._build_test_series(shape=[48, 3], freq=pd.DateOffset(hours=1)))
84+
self.assertAllEqual(model.batch_shape, [3])
85+
86+
def test_docstring_fitting_example(self):
87+
# Construct a series of eleven data points, covering a period of two weeks
88+
# with three missing days.
89+
series = pd.Series(
90+
[100., 27., 92., 66., 51., 126., 113., 95., 48., 20., 59.,],
91+
index=pd.to_datetime(['2020-01-01', '2020-01-02', '2020-01-04',
92+
'2020-01-05', '2020-01-06', '2020-01-07',
93+
'2020-01-10', '2020-01-11', '2020-01-12',
94+
'2020-01-13', '2020-01-14']))
95+
series = tfp.sts.regularize_series(series)
96+
self.assertLen(series, 14)
97+
98+
# Default model captures day-of-week effects with a LocalLinearTrend
99+
# baseline.
100+
model = default_model.build_default_model(series)
101+
self.assertLen(model.components, 2)
102+
103+
# Fit the model using variational inference.
104+
surrogate_posterior = tfp.sts.build_factored_surrogate_posterior(model)
105+
_ = tfp.vi.fit_surrogate_posterior(
106+
target_log_prob_fn=model.joint_log_prob(series),
107+
surrogate_posterior=surrogate_posterior,
108+
optimizer=tf.optimizers.Adam(0.1),
109+
num_steps=1000,
110+
convergence_criterion=(tfp.optimizer.convergence_criteria.
111+
SuccessiveGradientsAreUncorrelated(
112+
window_size=15, min_num_steps=50)),
113+
jit_compile=True)
114+
115+
# Forecast the next week.
116+
parameter_samples = surrogate_posterior.sample(50)
117+
forecast_dist = tfp.sts.forecast(model,
118+
observed_time_series=series,
119+
parameter_samples=parameter_samples,
120+
num_steps_forecast=7)
121+
# Strip trailing unit dimension from LinearGaussianStateSpaceModel events.
122+
self.evaluate(
123+
[v.initializer for v in surrogate_posterior.trainable_variables])
124+
forecast_mean, forecast_stddev = self.evaluate(
125+
(forecast_dist.mean()[..., 0], forecast_dist.stddev()[..., 0]))
126+
127+
pd.DataFrame(
128+
{'mean': forecast_mean,
129+
'lower_bound': forecast_mean - 2. * forecast_stddev,
130+
'upper_bound': forecast_mean + 2. * forecast_stddev},
131+
index=pd.date_range(start=series.index[-1] + series.index.freq,
132+
periods=7,
133+
freq=series.index.freq))
134+
135+
136+
if __name__ == '__main__':
137+
tf.test.main()

0 commit comments

Comments
 (0)