Skip to content

Commit 15039de

Browse files
committed
correcting an incorrect copy
1 parent 4da4ad3 commit 15039de

File tree

1 file changed

+49
-238
lines changed

1 file changed

+49
-238
lines changed
Lines changed: 49 additions & 238 deletions
Original file line numberDiff line numberDiff line change
@@ -1,239 +1,50 @@
1-
# This Source Code Form is subject to the terms of the Mozilla Public
2-
# License, v. 2.0. If a copy of the MPL was not distributed with this
3-
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
4-
51
import numpy as np
6-
import pandas as pd
7-
8-
from sklearn.preprocessing import OneHotEncoder, StandardScaler, PolynomialFeatures
9-
from sklearn.compose import ColumnTransformer
10-
from chainladder.development.base import DevelopmentBase
11-
from chainladder import options
12-
13-
14-
class DevelopmentML(DevelopmentBase):
15-
""" A Estimator that interfaces with machine learning (ML) tools that implement
16-
the scikit-learn API.
17-
18-
The `DevelopmentML` estimator is used to generate ``ldf_`` patterns from
19-
the data.
20-
21-
.. versionadded:: 0.8.1
22-
23-
24-
Parameters
25-
----------
26-
estimator_ml: skearn Estimator
27-
Any sklearn compatible regression estimator, including Pipelines and
28-
y_ml: list or str or sklearn_transformer
29-
The response column(s) for the machine learning algorithm. It must be
30-
present within the Triangle.
31-
autoregressive: tuple, (autoregressive_col_name, lag, source_col_name)
32-
The subset of response column(s) to use as lagged features for the
33-
Time Series aspects of the model. Predictions from one development period
34-
get used as featues in the next development period. Lags should be negative
35-
integers.
36-
feat_eng: dict
37-
A dictionary with feature names as keys and a dictionary of function (with a key of 'func') and keyword arguments (with a key of 'kwargs')
38-
(e.g. {
39-
'feature_1':{
40-
'func': function_name for feature 1,
41-
'kwargs': keyword arguments for the function
42-
},
43-
'feature_2':{
44-
'func': function_name for feature 2,
45-
'kwargs': keyword arguments for the function
46-
}
47-
}
48-
);
49-
functions should be written with a input Dataframe named df; this is the DataFrame containing origin, development, and valuation that will passed into the function at run time
50-
(e.g. this function adds 1 to every origin
51-
def test_func(df)
52-
return df['origin'] + 1
53-
)
54-
fit_incrementals:
55-
Whether the response variable should be converted to an incremental basis
56-
for fitting.
57-
58-
Attributes
59-
----------
60-
estimator_ml: Estimator
61-
An sklearn-style estimator to predict development patterns
62-
ldf_: Triangle
63-
The estimated loss development patterns.
64-
cdf_: Triangle
65-
The estimated cumulative development patterns.
66-
"""
67-
68-
def __init__(self, estimator_ml=None, y_ml=None, autoregressive=False,
69-
weight_ml=None, fit_incrementals=True, feat_eng=None):
70-
self.estimator_ml=estimator_ml
71-
self.y_ml=y_ml
72-
self.weight_ml = weight_ml
73-
self.autoregressive=autoregressive
74-
self.fit_incrementals = fit_incrementals
75-
self.feat_eng = feat_eng
76-
77-
def _get_y_names(self):
78-
""" private function to get the response column name"""
79-
if not self.y_ml:
80-
y_names = self._columns
81-
if hasattr(self.y_ml, '_columns'):
82-
y_names = self.y_ml._columns
83-
elif isinstance(self.y_ml, ColumnTransformer):
84-
y_names = self.y_ml.transformers[0][-1]
85-
if type(self.y_ml) is list:
86-
y_names = self.y_ml
87-
elif type(self.y_ml) is str:
88-
y_names = [self.y_ml]
89-
return y_names
90-
91-
92-
@property
93-
def y_ml_(self):
94-
defaults = self._get_y_names()
95-
transformer = self.y_ml
96-
if not transformer:
97-
return ColumnTransformer(
98-
transformers=[('passthrough', 'passthrough', defaults)])
99-
elif type(transformer) is list:
100-
return ColumnTransformer(
101-
transformers=[('passthrough', 'passthrough', transformer)])
102-
elif type(transformer) is str:
103-
return ColumnTransformer(
104-
transformers=[('passthrough', 'passthrough', [transformer])])
105-
else:
106-
return transformer
107-
108-
def _get_triangle_ml(self, df, preds=None):
109-
""" Create fitted Triangle """
110-
from chainladder.core import Triangle
111-
if preds is None:
112-
preds = self.estimator_ml.predict(df)
113-
X_r = [df]
114-
y_r = [preds]
115-
dgrain = {'Y':12, 'Q':3, 'M': 1, 'S': 6}[self.development_grain_]
116-
ograin = {'Y':1, 'Q':4, 'M': 12, 'S': 6}[self.origin_grain_]
117-
latest_filter = (df['origin']+1)*ograin+(df['development']-dgrain)/dgrain
118-
latest_filter = latest_filter == latest_filter.max()
119-
preds=pd.DataFrame(preds.copy())[latest_filter].values
120-
out = df.loc[latest_filter].copy()
121-
dev_lags = df['development'].drop_duplicates().sort_values()
122-
for d in dev_lags[1:]:
123-
out['development'] = out['development'] + dgrain
124-
out['valuation'] = out['valuation'] + dgrain / 12
125-
if len(preds.shape) == 1:
126-
preds = preds[:, None]
127-
if self.autoregressive:
128-
for num, col in enumerate(self.autoregressive):
129-
out[col[0]]=preds[:, num]
130-
out = out[out['development']<=dev_lags.max()]
131-
if len(out) == 0:
132-
continue
133-
X_r.append(out.copy())
134-
if self.feat_eng is not None:
135-
for key, item in self.feat_eng.items():
136-
out[key] = item['func'](df=out,**item['kwargs'])
137-
preds = self.estimator_ml.predict(out)
138-
y_r.append(preds.copy())
139-
X_r = pd.concat(X_r, axis=0).reset_index(drop=True)
140-
if True:
141-
X_r = X_r.drop(self._get_y_names(), axis=1)
142-
out = pd.concat((X_r,
143-
pd.DataFrame(np.concatenate(y_r, 0), columns=self._get_y_names())), axis=1)
144-
out['origin'] = out['origin'].map({v: k for k, v in self.origin_encoder_.items()})
145-
out['valuation'] = out['valuation'].map({v: k for k, v in self.valuation_encoder_.items()})
146-
return Triangle(
147-
out, origin='origin', development='valuation',
148-
index=self._key_labels, columns=self._get_y_names(),
149-
cumulative=not self.fit_incrementals).dropna()
150-
151-
def _prep_X_ml(self, X):
152-
""" Preps Triangle data ahead of the pipeline """
153-
if self.fit_incrementals:
154-
X_ = X.cum_to_incr()
155-
else:
156-
X_ = X.copy()
157-
if self.autoregressive:
158-
for i in self.autoregressive:
159-
lag = X[i[2]].shift(i[1])
160-
X_[i[0]] = lag[lag.valuation<=X.valuation_date]
161-
df_base = X.incr_to_cum().to_frame(
162-
keepdims=True, implicit_axis=True, origin_as_datetime=True
163-
).reset_index().iloc[:, :-1]
164-
df = df_base.merge(X.cum_to_incr().to_frame(
165-
keepdims=True, implicit_axis=True, origin_as_datetime=True
166-
).reset_index(), how='left',
167-
on=list(df_base.columns)).fillna(0)
168-
df['origin'] = df['origin'].map(self.origin_encoder_)
169-
df['valuation'] = df['valuation'].map(self.valuation_encoder_)
170-
if self.feat_eng is not None:
171-
for key, item in self.feat_eng.items():
172-
df[key] = item['func'](df=df,**item['kwargs'])
173-
return df
174-
175-
def fit(self, X, y=None, sample_weight=None):
176-
"""Fit the model with X.
177-
178-
Parameters
179-
----------
180-
X : Triangle-like
181-
Set of LDFs to which the estimator will be applied.
182-
y : None
183-
Ignored, use y_ml to set a reponse variable for the ML algorithm
184-
sample_weight : None
185-
Ignored
186-
187-
Returns
188-
-------
189-
self : object
190-
Returns the instance itself.
191-
"""
192-
193-
self._columns = list(X.columns)
194-
self._key_labels = X.key_labels
195-
self.origin_grain_ = X.origin_grain
196-
self.development_grain_ = X.development_grain
197-
self.origin_encoder_ = dict(zip(
198-
X.origin.to_timestamp(how='s'),
199-
(pd.Series(X.origin).rank()-1)/{'Y':1, 'Q':4, 'M': 12, 'S': 6}[X.origin_grain]))
200-
val = X.valuation.sort_values().unique()
201-
self.valuation_encoder_ = dict(zip(
202-
val,
203-
(pd.Series(val).rank()-1)/{'Y':1, 'Q':4, 'M': 12, 'S': 6}[X.development_grain]))
204-
df = self._prep_X_ml(X)
205-
self.df_ = df
206-
# Fit model
207-
self.estimator_ml.fit(df, self.y_ml_.fit_transform(df).squeeze())
208-
#return selffit_incrementals
209-
self.triangle_ml_ = self._get_triangle_ml(df)
210-
return self
211-
212-
@property
213-
def ldf_(self):
214-
ldf = self.triangle_ml_.incr_to_cum().link_ratio
215-
ldf.valuation_date = pd.to_datetime(options.ULT_VAL)
216-
return ldf
217-
218-
def transform(self, X):
219-
""" If X and self are of different shapes, align self to X, else
220-
return self.
221-
222-
Parameters
223-
----------
224-
X : Triangle
225-
The triangle to be transformed
226-
227-
Returns
228-
-------
229-
X_new : New triangle with transformed attributes.
230-
"""
231-
X_new = X.copy()
232-
X_ml = self._prep_X_ml(X)
233-
y_ml=self.estimator_ml.predict(X_ml)
234-
triangle_ml = self._get_triangle_ml(X_ml, y_ml)
235-
backend = "cupy" if X.array_backend == "cupy" else "numpy"
236-
X_new.ldf_ = triangle_ml.incr_to_cum().link_ratio.set_backend(backend)
237-
X_new.ldf_.valuation_date = pd.to_datetime(options.ULT_VAL)
238-
X_new._set_slicers()
239-
return X_new
2+
import chainladder as cl
3+
import pytest
4+
5+
def test_basic_bz():
6+
abc = cl.load_sample('abc')
7+
assert np.all(
8+
np.around(cl.BarnettZehnwirth(formula='C(origin)+C(development)').fit(abc).coef_.T.values,3).flatten()
9+
== np.array([11.837,0.179,0.345,0.378,0.405,0.427,0.431,0.66,0.963,1.157,1.278,0.251,-0.056,-0.449,-0.829,-1.169,-1.508,-1.798,-2.023,-2.238,-2.428])
10+
)
11+
12+
def test_multiple_triangle_exception():
13+
d = cl.load_sample("usauto")
14+
with pytest.raises(ValueError):
15+
cl.BarnettZehnwirth(formula='C(origin)+C(development)').fit(d)
16+
17+
def test_feat_eng_1():
18+
'''
19+
this function tests the passing in a basic engineered feature. Since test_func just returns development, C(development) and C(teatfeat) should yield identical results
20+
'''
21+
def test_func(df):
22+
return df["development"]
23+
24+
abc = cl.load_sample('abc')
25+
test_dict = {'testfeat':{'func':test_func,'kwargs':{}}}
26+
27+
assert np.all(
28+
np.around(cl.BarnettZehnwirth(formula='C(origin)+development+valuation').fit(abc).coef_.T.values,3)
29+
== np.around(cl.BarnettZehnwirth(formula='C(origin)+testfeat+valuation',feat_eng = test_dict).fit(abc).coef_.T.values,3)
30+
)
31+
32+
def test_feat_eng_2():
33+
'''
34+
this function tests more complex feature engineering. Since origin_onehot just replicates the one-hot encoding that's performed inside sklearn LinearRegression, the two BZ models should yield identical results
35+
36+
this function also tests the BZ transformer
37+
'''
38+
def origin_onehot(df,ori):
39+
return [1 if x == ori else 0 for x in df["origin"]]
40+
41+
abc = cl.load_sample('abc')
42+
feat_dict = {f'origin_{x}':{'func':origin_onehot,'kwargs':{'ori':float(x+1)}} for x in range(10)}
43+
assert np.all(
44+
np.around(cl.BarnettZehnwirth(formula='+'.join([f'C({x})' for x in feat_dict.keys()]),feat_eng = feat_dict).fit(abc).ldf_.values,3)
45+
== np.around(cl.BarnettZehnwirth(formula='C(origin)').fit_transform(abc).ldf_.values,3)
46+
)
47+
assert np.all(
48+
np.around(cl.BarnettZehnwirth(formula='+'.join([f'C({x})' for x in feat_dict.keys()]),feat_eng = feat_dict).fit(abc).ldf_.values,3)
49+
== np.around(cl.BarnettZehnwirth(formula='C(origin)').fit_transform(abc).ldf_.values,3)
50+
)

0 commit comments

Comments
 (0)