@@ -33,9 +33,14 @@ class DevelopmentML(DevelopmentBase):
3333 Time Series aspects of the model. Predictions from one development period
3434 get used as featues in the next development period. Lags should be negative
3535 integers.
36+ weight_step: str
37+ Step name within estimator_ml that is weighted
38+ drop: tuple or list of tuples
39+ Drops specific origin/development combination(s)
40+ drop_valuation: str or list of str (default = None)
41+ Drops specific valuation periods. str must be date convertible.
3642 fit_incrementals:
37- Whether the response variable should be converted to an incremental basis
38- for fitting.
43+ Whether the response variable should be converted to an incremental basis for fitting.
3944
4045 Attributes
4146 ----------
@@ -48,11 +53,13 @@ class DevelopmentML(DevelopmentBase):
4853 """
4954
5055 def __init__ (self , estimator_ml = None , y_ml = None , autoregressive = False ,
51- weight_ml = None , fit_incrementals = True ):
56+ weighted_step = None ,drop = None , drop_valuation = None , fit_incrementals = True ):
5257 self .estimator_ml = estimator_ml
5358 self .y_ml = y_ml
54- self .weight_ml = weight_ml
55- self .autoregressive = autoregressive
59+ self .weighted_step = weighted_step
60+ self .autoregressive = autoregressive
61+ self .drop = drop
62+ self .drop_valuation = drop_valuation
5663 self .fit_incrementals = fit_incrementals
5764
5865 def _get_y_names (self ):
@@ -124,7 +131,7 @@ def _get_triangle_ml(self, df, preds=None):
124131 return Triangle (
125132 out , origin = 'origin' , development = 'valuation' ,
126133 index = self ._key_labels , columns = self ._get_y_names (),
127- cumulative = not self .fit_incrementals ).dropna ()
134+ cumulative = not self .fit_incrementals ).dropna (), out
128135
129136 def _prep_X_ml (self , X ):
130137 """ Preps Triangle data ahead of the pipeline """
@@ -139,14 +146,25 @@ def _prep_X_ml(self, X):
139146 df_base = X .incr_to_cum ().to_frame (
140147 keepdims = True , implicit_axis = True , origin_as_datetime = True
141148 ).reset_index ().iloc [:, :- 1 ]
142- df = df_base .merge (X . cum_to_incr () .to_frame (
149+ df = df_base .merge (X_ .to_frame (
143150 keepdims = True , implicit_axis = True , origin_as_datetime = True
144151 ).reset_index (), how = 'left' ,
145152 on = list (df_base .columns )).fillna (0 )
146153 df ['origin' ] = df ['origin' ].map (self .origin_encoder_ )
147154 df ['valuation' ] = df ['valuation' ].map (self .valuation_encoder_ )
148155 return df
149156
157+ def _prep_w_ml (self ,X ,sample_weight = None ):
158+ weight_base = (~ np .isnan (X .values )).astype (float )
159+ weight = weight_base .copy ()
160+ if self .drop is not None :
161+ weight = weight * self ._drop_func (X )
162+ if self .drop_valuation is not None :
163+ weight = weight * self ._drop_valuation_func (X )
164+ if sample_weight is not None :
165+ weight = weight * sample_weight .values
166+ return weight .flatten ()[weight_base .flatten ()> 0 ]
167+
150168 def fit (self , X , y = None , sample_weight = None ):
151169 """Fit the model with X.
152170
@@ -156,8 +174,8 @@ def fit(self, X, y=None, sample_weight=None):
156174 Set of LDFs to which the estimator will be applied.
157175 y : None
158176 Ignored, use y_ml to set a reponse variable for the ML algorithm
159- sample_weight : None
160- Ignored
177+ sample_weight : Triangle-like
178+ Weights to use in the regression
161179
162180 Returns
163181 -------
@@ -178,10 +196,18 @@ def fit(self, X, y=None, sample_weight=None):
178196 (pd .Series (val ).rank ()- 1 )/ {'Y' :1 , 'S' : 2 , 'Q' :4 , 'M' : 12 }[X .development_grain ]))
179197 df = self ._prep_X_ml (X )
180198 self .df_ = df
199+ weight = self ._prep_w_ml (X ,sample_weight )
200+ self .weight_ = weight
201+ if self .weighted_step == None :
202+ sample_weights = {}
203+ elif isinstance (self .weighted_step , list ):
204+ sample_weights = {x + '__sample_weight' :weight for x in self .weighted_step }
205+ else :
206+ sample_weights = {self .weighted_step + '__sample_weight' :weight }
181207 # Fit model
182- self .estimator_ml .fit (df , self .y_ml_ .fit_transform (df ).squeeze ())
208+ self .estimator_ml .fit (df , self .y_ml_ .fit_transform (df ).squeeze (), ** sample_weights )
183209 #return selffit_incrementals
184- self .triangle_ml_ = self ._get_triangle_ml (df )
210+ self .triangle_ml_ , self . predicted_data_ = self ._get_triangle_ml (df )
185211 return self
186212
187213 @property
@@ -206,9 +232,10 @@ def transform(self, X):
206232 X_new = X .copy ()
207233 X_ml = self ._prep_X_ml (X )
208234 y_ml = self .estimator_ml .predict (X_ml )
209- triangle_ml = self ._get_triangle_ml (X_ml , y_ml )
235+ triangle_ml , predicted_data = self ._get_triangle_ml (X_ml , y_ml )
210236 backend = "cupy" if X .array_backend == "cupy" else "numpy"
211237 X_new .ldf_ = triangle_ml .incr_to_cum ().link_ratio .set_backend (backend )
212238 X_new .ldf_ .valuation_date = pd .to_datetime (options .ULT_VAL )
213239 X_new ._set_slicers ()
214- return X_new
240+ X_new .predicted_data_ = predicted_data
241+ return X_new
0 commit comments