Skip to content

Commit 483a603

Browse files
authored
Merge pull request #77 from uber/get-regression-coefficients
Public method to get regression coefficients
2 parents f692070 + 6eba2a4 commit 483a603

File tree

5 files changed

+172
-12
lines changed

5 files changed

+172
-12
lines changed

orbit/constants/constants.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from collections import namedtuple
12
from enum import Enum
23
import os
34

@@ -129,4 +130,10 @@ class BacktestAnalyzeKeys(Enum):
129130
# Defaults Values
130131
DEFAULT_REGRESSOR_SIGN = '='
131132
DEFAULT_REGRESSOR_BETA = 0
132-
DEFAULT_REGRESSOR_SIGMA = 1.0
133+
DEFAULT_REGRESSOR_SIGMA = 1.0
134+
135+
# beta coef columns
136+
COEFFICIENT_DF_COLS = namedtuple(
137+
'coefficients_df_cols',
138+
['REGRESSOR', 'REGRESSOR_SIGN', 'COEFFICIENT']
139+
)('regressor', 'regressor_sign', 'coefficient')

orbit/lgt.py

Lines changed: 93 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
from orbit.constants.constants import (
1111
DEFAULT_REGRESSOR_SIGN,
1212
DEFAULT_REGRESSOR_BETA,
13-
DEFAULT_REGRESSOR_SIGMA
13+
DEFAULT_REGRESSOR_SIGMA,
14+
COEFFICIENT_DF_COLS,
15+
PredictMethod
1416
)
1517
from orbit.exceptions import (
1618
PredictionException,
@@ -362,6 +364,95 @@ def _set_model_param_names(self):
362364
self.model_param_names += [
363365
lgt.RegressionStanSamplingParameters.REGULAR_REGRESSOR_BETA.value]
364366

367+
@staticmethod
368+
def _concat_regression_coefs(pr_beta=None, rr_beta=None):
369+
"""Concatenates regression posterior matrix
370+
371+
In the case that `pr_beta` or `rr_beta` is a 1d tensor, transform to 2d tensor and
372+
concatenate.
373+
374+
Args
375+
----
376+
pr_beta : torch.tensor
377+
postive-value constrainted regression betas
378+
rr_beta : torch.tensor
379+
regular regression betas
380+
381+
Returns
382+
-------
383+
torch.tensor
384+
concatenated 2d tensor of shape (1, len(rr_beta) + len(pr_beta))
385+
386+
"""
387+
regressor_beta = None
388+
if pr_beta is not None and rr_beta is not None:
389+
pr_beta = pr_beta if len(pr_beta.shape) == 2 else pr_beta.reshape(1, -1)
390+
rr_beta = rr_beta if len(rr_beta.shape) == 2 else rr_beta.reshape(1, -1)
391+
regressor_beta = torch.cat((pr_beta, rr_beta), dim=1)
392+
elif pr_beta is not None:
393+
regressor_beta = pr_beta
394+
elif rr_beta is not None:
395+
regressor_beta = rr_beta
396+
397+
return regressor_beta
398+
399+
def get_regression_coefs(self, aggregation_method='mean'):
400+
"""Return DataFrame regression coefficients
401+
402+
Args
403+
----
404+
aggregation_method : str
405+
any PredictMethod except `full`
406+
"""
407+
def _validate_args():
408+
valid_args = set([x.value for x in PredictMethod])
409+
valid_args = valid_args - set([PredictMethod.FULL_SAMPLING.value])
410+
411+
if aggregation_method not in valid_args:
412+
raise IllegalArgument("aggregation_method must be one of {}".format(valid_args))
413+
414+
# init dataframe
415+
reg_df = pd.DataFrame()
416+
417+
# end if no regressors
418+
if self.num_of_regular_regressors + self.num_of_positive_regressors == 0:
419+
return reg_df
420+
421+
_validate_args()
422+
423+
pr_beta = self.aggregated_posteriors\
424+
.get(aggregation_method)\
425+
.get(lgt.RegressionStanSamplingParameters.POSITIVE_REGRESSOR_BETA.value)
426+
427+
rr_beta = self.aggregated_posteriors\
428+
.get(aggregation_method)\
429+
.get(lgt.RegressionStanSamplingParameters.REGULAR_REGRESSOR_BETA.value)
430+
431+
# because `_conccat_regression_coefs` operates on torch tensors
432+
pr_beta = torch.from_numpy(pr_beta) if pr_beta is not None else pr_beta
433+
rr_beta = torch.from_numpy(rr_beta) if rr_beta is not None else rr_beta
434+
435+
regressor_betas = self._concat_regression_coefs(pr_beta, rr_beta)
436+
437+
# get column names
438+
pr_cols = self.positive_regressor_col
439+
rr_cols = self.regular_regressor_col
440+
441+
# note ordering here is not the same as `self.regressor_cols` because positive
442+
# and negative do not have to be grouped on input
443+
regressor_cols = pr_cols + rr_cols
444+
445+
# same note
446+
regressor_signs \
447+
= ["Positive"] * self.num_of_positive_regressors \
448+
+ ["Regular"] * self.num_of_regular_regressors
449+
450+
reg_df[COEFFICIENT_DF_COLS.REGRESSOR] = regressor_cols
451+
reg_df[COEFFICIENT_DF_COLS.REGRESSOR_SIGN] = regressor_signs
452+
reg_df[COEFFICIENT_DF_COLS.COEFFICIENT] = regressor_betas.flatten()
453+
454+
return reg_df
455+
365456
def _predict(self, df=None, include_error=False, decompose=False):
366457
"""Vectorized version of prediction math"""
367458

@@ -407,16 +498,7 @@ def _predict(self, df=None, include_error=False, decompose=False):
407498
# regression components
408499
pr_beta = model.get(lgt.RegressionStanSamplingParameters.POSITIVE_REGRESSOR_BETA.value)
409500
rr_beta = model.get(lgt.RegressionStanSamplingParameters.REGULAR_REGRESSOR_BETA.value)
410-
regressor_beta = None
411-
if pr_beta is not None and rr_beta is not None:
412-
pr_beta = pr_beta if len(pr_beta.shape) == 2 else pr_beta.reshape(1, -1)
413-
rr_beta = rr_beta if len(rr_beta.shape) == 2 else rr_beta.reshape(1, -1)
414-
regressor_beta = torch.cat((pr_beta, rr_beta), dim=1)
415-
elif pr_beta is not None:
416-
regressor_beta = pr_beta
417-
elif rr_beta is not None:
418-
regressor_beta = rr_beta
419-
501+
regressor_beta = self._concat_regression_coefs(pr_beta, rr_beta)
420502

421503
################################################################
422504
# Prediction Attributes

tests/test_dlt.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
from orbit.dlt import DLT
55
from orbit.exceptions import IllegalArgument
66

7+
from orbit.constants.constants import COEFFICIENT_DF_COLS
8+
79

810
def test_dlt_fit(iclaims_training_data):
911
dlt = DLT(
@@ -269,6 +271,27 @@ def test_dlt_with_regressors_and_forecast(iclaims_training_data):
269271
assert list(predicted_df.columns) == expected_columns
270272

271273

274+
def test_get_regression_coefs(iclaims_training_data):
275+
regressor_cols = ['trend.unemploy', 'trend.filling', 'trend.job']
276+
277+
dlt = DLT(
278+
response_col='claims',
279+
date_col='week',
280+
seasonality=52,
281+
chains=4,
282+
prediction_percentiles=[5, 95, 30],
283+
predict_method='full',
284+
sample_method='mcmc',
285+
regressor_col=regressor_cols
286+
)
287+
288+
dlt.fit(df=iclaims_training_data)
289+
290+
reg_coefs = dlt.get_regression_coefs()
291+
292+
assert set(reg_coefs[COEFFICIENT_DF_COLS.REGRESSOR]) == set(regressor_cols)
293+
294+
272295
def test_dlt_multiple_fits(m3_monthly_data):
273296

274297
dlt = DLT(response_col='value',

tests/test_lgt.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from orbit.lgt import LGT
66
from orbit.exceptions import IllegalArgument
77

8+
from orbit.constants.constants import COEFFICIENT_DF_COLS
9+
810

911
def test_lgt_fit(iclaims_training_data):
1012
lgt = LGT(
@@ -270,6 +272,32 @@ def test_lgt_with_regressors_and_forecast(iclaims_training_data):
270272
assert list(predicted_df.columns) == expected_columns
271273

272274

275+
def test_get_regression_coefs(iclaims_training_data):
276+
regressor_cols = ['trend.unemploy', 'trend.filling', 'trend.job']
277+
278+
lgt = LGT(
279+
response_col='claims',
280+
date_col='week',
281+
seasonality=52,
282+
chains=4,
283+
prediction_percentiles=[5, 95, 30],
284+
predict_method='full',
285+
sample_method='mcmc',
286+
regressor_col=regressor_cols,
287+
regressor_sign=["=", "=", "+"]
288+
)
289+
290+
lgt.fit(df=iclaims_training_data)
291+
292+
reg_coefs = lgt.get_regression_coefs()
293+
294+
assert set(reg_coefs[COEFFICIENT_DF_COLS.REGRESSOR]) == set(regressor_cols)
295+
296+
# negative case
297+
with pytest.raises(IllegalArgument):
298+
lgt.get_regression_coefs(aggregation_method='full')
299+
300+
273301
def test_lgt_multiple_fits(m3_monthly_data):
274302

275303
lgt = LGT(response_col='value',

tests/test_pyro.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import pytest
3333
from orbit.lgt import LGT
3434
from orbit.exceptions import IllegalArgument
35+
from orbit.constants.constants import COEFFICIENT_DF_COLS
3536

3637

3738
def test_lgt_pyro_fit(iclaims_training_data):
@@ -113,3 +114,22 @@ def test_lgt_pyro_fit_and_full_predict(iclaims_training_data):
113114

114115
assert predicted_out.shape == expected_shape
115116
assert list(predicted_out.columns) == expected_columns
117+
118+
# todo: fix regression in pyro implementation
119+
# def test_get_regression_coefs(iclaims_training_data):
120+
# regressor_cols = ['trend.unemploy', 'trend.filling', 'trend.job']
121+
#
122+
# lgt = LGT(
123+
# response_col='claims',
124+
# date_col='week',
125+
# seasonality=52,
126+
# chains=4,
127+
# predict_method='mean',
128+
# inference_engine='pyro',
129+
# regressor_col=regressor_cols,
130+
# regressor_sign=["=", "=", "+"]
131+
# )
132+
#
133+
# lgt.fit(df=iclaims_training_data)
134+
# reg_coefs = lgt.get_regression_coefs()
135+
# assert set(reg_coefs[COEFFICIENT_DF_COLS.REGRESSOR]) == set(regressor_cols)

0 commit comments

Comments
 (0)