forked from casact/chainladder-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_barnzehn.py
More file actions
87 lines (69 loc) · 3.99 KB
/
test_barnzehn.py
File metadata and controls
87 lines (69 loc) · 3.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import numpy as np
import chainladder as cl
import pytest
abc = cl.load_sample('abc')
def test_basic_bz():
assert np.all(
np.around(cl.BarnettZehnwirth(formula='C(origin)+C(development)').fit(abc).coef_.T.values,3).flatten()
== np.array([11.837,0.179,0.345,0.378,0.405,0.427,0.431,0.66,0.963,1.157,1.278,0.251,-0.056,-0.449,-0.829,-1.169,-1.508,-1.798,-2.023,-2.238,-2.428])
)
def test_multiple_triangle_exception():
d = cl.load_sample("usauto")
with pytest.raises(ValueError):
cl.BarnettZehnwirth(formula='C(origin)+C(development)').fit(d)
def test_feat_eng_1():
'''
this function tests the passing in a basic engineered feature. Since test_func just returns development, C(development) and C(teatfeat) should yield identical results
'''
def test_func(df):
return df["development"]
test_dict = {'testfeat':{'func':test_func,'kwargs':{}}}
assert np.all(
np.around(cl.BarnettZehnwirth(formula='C(origin)+development+valuation').fit(abc).coef_.T.values,3)
== np.around(cl.BarnettZehnwirth(formula='C(origin)+testfeat+valuation',feat_eng = test_dict).fit(abc).coef_.T.values,3)
)
def test_feat_eng_2():
'''
this function tests more complex feature engineering. Since origin_onehot just replicates the one-hot encoding that's performed inside sklearn LinearRegression, the two BZ models should yield identical results
this function also tests the BZ transformer
'''
def origin_onehot(df,ori):
return [1 if x == ori else 0 for x in df["origin"]]
feat_dict = {f'origin_{x}':{'func':origin_onehot,'kwargs':{'ori':float(x+1)}} for x in range(10)}
assert np.all(
np.around(cl.BarnettZehnwirth(formula='+'.join([f'C({x})' for x in feat_dict.keys()]),feat_eng = feat_dict).fit(abc).ldf_.values,3)
== np.around(cl.BarnettZehnwirth(formula='C(origin)').fit_transform(abc).ldf_.values,3)
)
def test_drops():
'''
this function tests the passing in a basic drop_valuation
'''
def test_func(df):
return df["development"]
test_dict = {'testfeat':{'func':test_func,'kwargs':{}}}
assert np.all(
np.around(cl.BarnettZehnwirth(formula='C(development)',drop_valuation='1979').fit(abc).triangle_ml_.values,3)
== np.around(cl.BarnettZehnwirth(formula='C(testfeat)',drop = [('1977',36),('1978',24),('1979',12)],feat_eng = test_dict).fit(abc).triangle_ml_.values,3)
)
def test_bz_2008():
'''
this function tests the drop parameter by recreating the example in the 2008 BZ paper, section 4.1
'''
exposure=np.array([[2.2], [2.4], [2.2], [2.0], [1.9], [1.6], [1.6], [1.8], [2.2], [2.5], [2.6]])
abc_adj = abc/exposure
def predictor_bins(df,pbin,axis):
return [int(x >= min(pbin)) for x in df[axis]]
origin_groups = {f'origin_{ori}'.replace('[','').replace(']','').replace(', ',''):{'func':predictor_bins,'kwargs':{'pbin':ori,'axis':'origin'}} for ori in [[2],[3,4],[5,6,7,8,9,10]]}
def trend_piece(df,piece,axis):
pmax = float(max(piece))
increment=min(df[axis][df[axis]>0])
pfirst = piece[0]-increment
return [(x-pfirst)/increment if x in piece else (0 if x<pmax else (pmax-pfirst)/increment) for x in df[axis]]
development_groups = {f'development_{dev}'.replace('[','').replace(']','').replace(', ',''):{'func':trend_piece,'kwargs':{'piece':dev,'axis':'development'}} for dev in [[24],[36],[48,60,72],[84,96],[108,120,132]]}
valuation_groups = {f'valuation_{val}'.replace('[','').replace(']','').replace(', ',''):{'func':trend_piece,'kwargs':{'piece':val,'axis':'valuation'}} for val in [[1,2,3,4,5,6,7],[8],[9,10]]}
abc_dict = {**origin_groups,**development_groups,**valuation_groups}
model=cl.BarnettZehnwirth(formula='+'.join([z for z in abc_dict.keys()]),feat_eng=abc_dict, drop=('1982',72)).fit(abc_adj)
assert np.all(
np.around(model.coef_.values,4).flatten()
== np.array([11.1579,0.1989,0.0703,0.0919,0.1871,-0.3771,-0.4465,-0.3727,-0.3154,0.0432,0.0858,0.1464])
)