Skip to content

Commit 33d3d5c

Browse files
rohanfbfacebook-github-bot
authored andcommitted
Detector Param Tuning
Summary: Minor changes to support metalearning for detection Reviewed By: yangbk560 Differential Revision: D28313594 fbshipit-source-id: 9e21ed88b40ed4a9153508a9548652a0638903b9
1 parent b84c91f commit 33d3d5c

File tree

5 files changed

+113
-47
lines changed

5 files changed

+113
-47
lines changed

kats/detectors/bocpd_model.py

Lines changed: 65 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -9,24 +9,22 @@
99
algorithm as a DetectorModel, to provide a common interface.
1010
"""
1111

12-
from typing import Optional
13-
import pandas as pd
1412
import json
13+
from typing import Optional
1514

16-
from kats.detectors.detector import DetectorModel
17-
15+
import pandas as pd
1816
from kats.consts import TimeSeriesData
19-
2017
from kats.detectors.bocpd import (
2118
BOCPDetector,
2219
BOCPDModelType,
2320
)
24-
21+
from kats.detectors.detector import DetectorModel
2522
from kats.detectors.detector_consts import (
2623
AnomalyResponse,
2724
ConfidenceBand,
2825
)
2926

27+
3028
class BocpdDetectorModel(DetectorModel):
3129
"""Implements the Bayesian Online Changepoint Detection as a DetectorModel.
3230
@@ -43,16 +41,25 @@ class BocpdDetectorModel(DetectorModel):
4341
>>> anom = bocpd_detector.fit_predict(data=level_ts)
4442
"""
4543

46-
def __init__(self, serialized_model: Optional[bytes] = None, slow_drift: bool = False):
44+
def __init__(
45+
self,
46+
serialized_model: Optional[bytes] = None,
47+
slow_drift: bool = False,
48+
threshold: Optional[float] = None,
49+
):
4750
if serialized_model is None:
4851
self.slow_drift = slow_drift
52+
self.threshold = threshold
4953
else:
5054
model_dict = json.loads(serialized_model)
51-
if 'slow_drift' in model_dict:
52-
self.slow_drift = model_dict['slow_drift']
55+
if "slow_drift" in model_dict:
56+
self.slow_drift = model_dict["slow_drift"]
5357
else:
5458
self.slow_drift = slow_drift
55-
59+
if "threshold" in model_dict:
60+
self.threshold = model_dict["threshold"]
61+
else:
62+
self.threshold = threshold
5663

5764
def serialize(self) -> bytes:
5865
"""Returns the serialzed model.
@@ -64,7 +71,7 @@ def serialize(self) -> bytes:
6471
json containing information about serialized model.
6572
"""
6673

67-
model_dict = {'slow_drift': self.slow_drift}
74+
model_dict = {"slow_drift": self.slow_drift}
6875
return json.dumps(model_dict).encode("utf-8")
6976

7077
def _handle_missing_data_extend(
@@ -76,10 +83,7 @@ def _handle_missing_data_extend(
7683
# but we will remove the interpolated data when we
7784
# evaluate, to make sure that the anomaly score is
7885
# the same length as data
79-
original_time_list = (
80-
list(historical_data.time)
81-
+ list(data.time)
82-
)
86+
original_time_list = list(historical_data.time) + list(data.time)
8387

8488
if historical_data.is_data_missing():
8589
historical_data = historical_data.interpolate()
@@ -90,20 +94,27 @@ def _handle_missing_data_extend(
9094

9195
# extend has been done, now remove the interpolated data
9296
data = TimeSeriesData(
93-
pd.DataFrame({
94-
'time':[
95-
historical_data.time.iloc[i] for i in range(len(historical_data))
96-
if historical_data.time.iloc[i] in original_time_list],
97-
'value':[
98-
historical_data.value.iloc[i] for i in range(len(historical_data))
99-
if historical_data.time.iloc[i] in original_time_list]
100-
}),
101-
use_unix_time=True, unix_time_units="s", tz="US/Pacific"
97+
pd.DataFrame(
98+
{
99+
"time": [
100+
historical_data.time.iloc[i]
101+
for i in range(len(historical_data))
102+
if historical_data.time.iloc[i] in original_time_list
103+
],
104+
"value": [
105+
historical_data.value.iloc[i]
106+
for i in range(len(historical_data))
107+
if historical_data.time.iloc[i] in original_time_list
108+
],
109+
}
110+
),
111+
use_unix_time=True,
112+
unix_time_units="s",
113+
tz="US/Pacific",
102114
)
103115

104116
return data
105117

106-
107118
# pyre-fixme[14]: `fit_predict` overrides method defined in `DetectorModel`
108119
# inconsistently.
109120
def fit_predict(
@@ -129,7 +140,7 @@ def fit_predict(
129140
# pyre-fixme[16]: `BocpdDetectorModel` has no attribute `last_N`.
130141
self.last_N = len(data)
131142

132-
#if there is historical data
143+
# if there is historical data
133144
# we prepend it to data, and run
134145
# the detector as if we only saw data
135146
if historical_data is not None:
@@ -138,20 +149,39 @@ def fit_predict(
138149
bocpd_model = BOCPDetector(data=data)
139150

140151
if not self.slow_drift:
141-
_ = bocpd_model.detector(
142-
model=BOCPDModelType.NORMAL_KNOWN_MODEL, choose_priors=True,
143-
agg_cp=True
144-
)
152+
153+
if self.threshold is not None:
154+
_ = bocpd_model.detector(
155+
model=BOCPDModelType.NORMAL_KNOWN_MODEL,
156+
choose_priors=True,
157+
agg_cp=True,
158+
threshold=self.threshold,
159+
)
160+
else:
161+
_ = bocpd_model.detector(
162+
model=BOCPDModelType.NORMAL_KNOWN_MODEL,
163+
choose_priors=True,
164+
agg_cp=True,
165+
)
145166
else:
146-
_ = bocpd_model.detector(
147-
model=BOCPDModelType.TREND_CHANGE_MODEL, choose_priors=False,
148-
agg_cp=True
149-
)
167+
if self.threshold is not None:
168+
_ = bocpd_model.detector(
169+
model=BOCPDModelType.NORMAL_KNOWN_MODEL,
170+
choose_priors=True,
171+
agg_cp=True,
172+
threshold=self.threshold,
173+
)
174+
else:
175+
_ = bocpd_model.detector(
176+
model=BOCPDModelType.TREND_CHANGE_MODEL,
177+
choose_priors=False,
178+
agg_cp=True,
179+
)
150180

151181
change_prob_dict = bocpd_model.get_change_prob()
152182
change_prob = list(change_prob_dict.values())[0]
153183

154-
#construct the object
184+
# construct the object
155185
N = len(data)
156186
default_ts = TimeSeriesData(time=data.time, value=pd.Series(N * [0.0]))
157187
score_ts = TimeSeriesData(time=data.time, value=pd.Series(change_prob))

kats/detectors/changepoint_evaluator.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -416,8 +416,6 @@ def _parse_data(self, df_row: Any):
416416
this_ts = df_row['time_series']
417417
this_anno = df_row['annotation']
418418

419-
print(this_dataset)
420-
421419
this_anno_json_acc = this_anno.replace("'", "\"")
422420
this_anno_dict = json.loads(this_anno_json_acc)
423421

kats/detectors/cusum_model.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import logging
3232
from datetime import datetime
3333
from enum import Enum
34-
from typing import Any, List, Optional
34+
from typing import Any, List, Optional, Union
3535

3636
import numpy as np
3737
import pandas as pd
@@ -49,7 +49,6 @@
4949
CHANGEPOINT_RETENTION = 7 * 24 * 60 * 60 # in seconds
5050
MAX_CHANGEPOINT = 10
5151

52-
5352
def percentage_change(
5453
data: TimeSeriesData, pre_mean: float, **kwargs: Any
5554
) -> TimeSeriesData:
@@ -94,12 +93,18 @@ class CusumScoreFunction(Enum):
9493
percentage_change = "percentage_change"
9594
z_score = "z_score"
9695

97-
96+
# Score Function Constants
9897
SCORE_FUNC_DICT = {
9998
CusumScoreFunction.change.value: change,
10099
CusumScoreFunction.percentage_change.value: percentage_change,
101100
CusumScoreFunction.z_score.value: z_score,
102101
}
102+
DEFAULT_SCORE_FUNCTION = CusumScoreFunction.change
103+
STR_TO_SCORE_FUNC = { # Used for param tuning
104+
"change": CusumScoreFunction.change,
105+
"percentage_change": CusumScoreFunction.percentage_change,
106+
"z_score": CusumScoreFunction.z_score,
107+
}
103108

104109

105110
class CUSUMDetectorModel(DetectorModel):
@@ -139,7 +144,7 @@ def __init__(
139144
magnitude_quantile: float = CUSUM_DEFAULT_ARGS["magnitude_quantile"],
140145
magnitude_ratio: float = CUSUM_DEFAULT_ARGS["magnitude_ratio"],
141146
change_directions: List[str] = CUSUM_DEFAULT_ARGS["change_directions"],
142-
score_func: CusumScoreFunction = CusumScoreFunction.change,
147+
score_func: Union[str, CusumScoreFunction] = DEFAULT_SCORE_FUNCTION,
143148
remove_seasonality: bool = CUSUM_DEFAULT_ARGS["remove_seasonality"],
144149
):
145150
if serialized_model:
@@ -178,8 +183,16 @@ def __init__(
178183
self.magnitude_quantile = magnitude_quantile
179184
self.magnitude_ratio = magnitude_ratio
180185
self.change_directions = change_directions
181-
self.score_func = score_func.value
182186
self.remove_seasonality = remove_seasonality
187+
188+
# We allow score_function to be a str for compatibility with param tuning
189+
if isinstance(score_func, str):
190+
if score_func in STR_TO_SCORE_FUNC:
191+
score_func = STR_TO_SCORE_FUNC[score_func]
192+
else:
193+
score_func = DEFAULT_SCORE_FUNCTION
194+
self.score_func = score_func.value
195+
183196
else:
184197
raise ValueError(
185198
"""

kats/models/metalearner/metalearner_hpt.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,14 @@
4949
],
5050
"numerical_idx": [],
5151
},
52+
"cusum": {
53+
"categorical_idx": ["score_func"],
54+
"numerical_idx": ["delta_std_ratio", "scan_window", "historical_window"],
55+
},
56+
"statsig": {
57+
"categorical_idx": [],
58+
"numerical_idx": ["n_control", "n_test"],
59+
}
5260
}
5361

5462
default_model_networks = {
@@ -78,6 +86,16 @@
7886
"n_hidden_cat_combo": [[5], [5], [2], [3], [5], [5], [5]],
7987
"n_hidden_num": [],
8088
},
89+
"cusum": {
90+
"n_hidden_shared": [20],
91+
"n_hidden_cat_combo": [[3]],
92+
"n_hidden_num": [5, 5, 5],
93+
},
94+
"statsig": {
95+
"n_hidden_shared": [20],
96+
"n_hidden_cat_combo": [],
97+
"n_hidden_num": [5, 5],
98+
},
8199
}
82100

83101

@@ -163,7 +181,7 @@ def __init__(
163181
numerical_idx = default_model_params[default_model]["numerical_idx"]
164182

165183
else:
166-
msg = f"default_model={default_model} is not available! Please choose one from 'prophet', 'arima', 'sarima', 'holtwinters', stlf, 'theta'"
184+
msg = f"default_model={default_model} is not available! Please choose one from 'prophet', 'arima', 'sarima', 'holtwinters', 'stlf', 'theta', 'cusum', 'statsig'"
167185
logging.error(msg)
168186
raise ValueError(msg)
169187

@@ -385,7 +403,7 @@ def _prepare_data(
385403
else None
386404
)
387405
y_num = (
388-
torch.from_numpy(self._target_num[train_idx, :]).float()
406+
torch.from_numpy(self._target_num[train_idx, :].astype('float')).float()
389407
if self.numerical_idx
390408
else None
391409
)
@@ -398,7 +416,7 @@ def _prepare_data(
398416
else None
399417
)
400418
y_num_val = (
401-
torch.from_numpy(self._target_num[val_idx, :]).float()
419+
torch.from_numpy(self._target_num[val_idx, :].astype('float')).float()
402420
if self.numerical_idx
403421
else None
404422
)

kats/models/metalearner/metalearner_modelselect.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,16 @@ class MetaLearnModelSelect:
6060
def __init__(
6161
self, metadata: Optional[List[Dict[str, Any]]] = None, load_model: bool = False
6262
) -> None:
63-
if not load_model and metadata is not None:
63+
if not load_model:
64+
# pyre-fixme[6]: Expected `Sized` for 1st param but got
65+
# `Optional[List[typing.Any]]`.
6466
if len(metadata) <= 30:
65-
msg = f"metadata size should be greater than 30 but receives {len(metadata)}."
67+
msg = "Dataset is too small to train a meta learner!"
68+
logging.error(msg)
69+
raise ValueError(msg)
70+
71+
if metadata is None:
72+
msg = "Missing metadata!"
6673
logging.error(msg)
6774
raise ValueError(msg)
6875

0 commit comments

Comments
 (0)