@@ -33,6 +33,10 @@ class DevelopmentML(DevelopmentBase):
3333 Time Series aspects of the model. Predictions from one development period
3434 get used as featues in the next development period. Lags should be negative
3535 integers.
36+ drop: tuple or list of tuples
37+ Drops specific origin/development combination(s)
38+ drop_valuation: str or list of str (default = None)
39+ Drops specific valuation periods. str must be date convertible.
3640 feat_eng: dict
3741 A dictionary with feature names as keys and a dictionary of function (with a key of 'func') and keyword arguments (with a key of 'kwargs')
3842 (e.g. {
@@ -66,11 +70,14 @@ def test_func(df)
6670 """
6771
6872 def __init__ (self , estimator_ml = None , y_ml = None , autoregressive = False ,
69- weight_ml = None , fit_incrementals = True , feat_eng = None ):
73+ weight_ml = None , weighted_step = None , drop = None , drop_valuation = None , fit_incrementals = True , feat_eng = None ):
7074 self .estimator_ml = estimator_ml
7175 self .y_ml = y_ml
7276 self .weight_ml = weight_ml
73- self .autoregressive = autoregressive
77+ self .weighted_step = weighted_step
78+ self .autoregressive = autoregressive
79+ self .drop = drop
80+ self .drop_valuation = drop_valuation
7481 self .fit_incrementals = fit_incrementals
7582 self .feat_eng = feat_eng
7683
@@ -146,7 +153,7 @@ def _get_triangle_ml(self, df, preds=None):
146153 return Triangle (
147154 out , origin = 'origin' , development = 'valuation' ,
148155 index = self ._key_labels , columns = self ._get_y_names (),
149- cumulative = not self .fit_incrementals ).dropna ()
156+ cumulative = not self .fit_incrementals ).dropna (), out
150157
151158 def _prep_X_ml (self , X ):
152159 """ Preps Triangle data ahead of the pipeline """
@@ -170,7 +177,13 @@ def _prep_X_ml(self, X):
170177 if self .feat_eng is not None :
171178 for key , item in self .feat_eng .items ():
172179 df [key ] = item ['func' ](df = df ,** item ['kwargs' ])
173- return df
180+ weight_base = (~ np .isnan (X .values )).astype (float )
181+ weight = weight_base .copy ()
182+ if self .drop is not None :
183+ weight = weight * self ._drop_func (X )
184+ if self .drop_valuation is not None :
185+ weight = weight * self ._drop_valuation_func (X )
186+ return df , weight .flatten ()[weight_base .flatten ()> 0 ]
174187
175188 def fit (self , X , y = None , sample_weight = None ):
176189 """Fit the model with X.
@@ -201,12 +214,19 @@ def fit(self, X, y=None, sample_weight=None):
201214 self .valuation_encoder_ = dict (zip (
202215 val ,
203216 (pd .Series (val ).rank ()- 1 )/ {'Y' :1 , 'S' : 2 , 'Q' :4 , 'M' : 12 }[X .development_grain ]))
204- df = self ._prep_X_ml (X )
217+ df , weight = self ._prep_X_ml (X )
205218 self .df_ = df
219+ self .weight_ = weight
220+ if self .weighted_step == None :
221+ sample_weights = {}
222+ elif isinstance (self .weighted_step , list ):
223+ sample_weights = {x + '__sample_weight' :weight for x in self .weighted_step }
224+ else :
225+ sample_weights = {self .weighted_step + '__sample_weight' :weight }
206226 # Fit model
207- self .estimator_ml .fit (df , self .y_ml_ .fit_transform (df ).squeeze ())
227+ self .estimator_ml .fit (df , self .y_ml_ .fit_transform (df ).squeeze (), ** sample_weights )
208228 #return selffit_incrementals
209- self .triangle_ml_ = self ._get_triangle_ml (df )
229+ self .triangle_ml_ , self . predicted_data_ = self ._get_triangle_ml (df )
210230 return self
211231
212232 @property
@@ -229,11 +249,12 @@ def transform(self, X):
229249 X_new : New triangle with transformed attributes.
230250 """
231251 X_new = X .copy ()
232- X_ml = self ._prep_X_ml (X )
252+ X_ml , weight_ml = self ._prep_X_ml (X )
233253 y_ml = self .estimator_ml .predict (X_ml )
234- triangle_ml = self ._get_triangle_ml (X_ml , y_ml )
254+ triangle_ml , predicted_data = self ._get_triangle_ml (X_ml , y_ml )
235255 backend = "cupy" if X .array_backend == "cupy" else "numpy"
236256 X_new .ldf_ = triangle_ml .incr_to_cum ().link_ratio .set_backend (backend )
237257 X_new .ldf_ .valuation_date = pd .to_datetime (options .ULT_VAL )
238258 X_new ._set_slicers ()
239- return X_new
259+ X_new .predicted_data_ = predicted_data
260+ return X_new
0 commit comments