diff --git a/chainladder/development/barnzehn.py b/chainladder/development/barnzehn.py index 243d4f30..783b3645 100644 --- a/chainladder/development/barnzehn.py +++ b/chainladder/development/barnzehn.py @@ -46,13 +46,6 @@ def fit(self, X, y=None, sample_weight=None): raise ValueError("Only single index/column triangles are supported") tri = X.cum_to_incr().log() response = X.columns[0] if not self.response else self.response - # Check for more than one linear predictor - linearpredictors = 0 - for term in ModelDesc.from_formula(self.formula).rhs_termlist[1:]: - if 'C(' not in term.factors[0].code: - linearpredictors += 1 - if linearpredictors > 1: - warnings.warn("Using more than one linear predictor with BarnettZehnwirth may lead to issues with multicollinearity.") self.model_ = DevelopmentML(Pipeline(steps=[ ('design_matrix', PatsyFormula(self.formula)), ('model', LinearRegression(fit_intercept=False))]), diff --git a/chainladder/development/tests/test_barnzehn.py b/chainladder/development/tests/test_barnzehn.py index 8ec0cbb6..e57da81c 100644 --- a/chainladder/development/tests/test_barnzehn.py +++ b/chainladder/development/tests/test_barnzehn.py @@ -1,6 +1,7 @@ import numpy as np import chainladder as cl import pytest +from chainladder.utils.utility_functions import PTF_formula abc = cl.load_sample('abc') def test_basic_bz(): @@ -34,14 +35,13 @@ def test_bz_2008(): exposure=np.array([[2.2], [2.4], [2.2], [2.0], [1.9], [1.6], [1.6], [1.8], [2.2], [2.5], [2.6]]) abc_adj = abc/exposure - origin_buckets = [2,3,5] - dev_buckets = [(24,36),(36,48),(48,84),(84,108),(108,9999)] - val_buckets = [(1,8),(8,9),(9,999)] - - origin_formula = '+'.join([f'I(origin >= {x})' for x in origin_buckets]) - dev_formula = '+'.join([f'I((np.minimum({x[1]-12},development) - np.minimum({x[0]-12},development))/12)' for x in dev_buckets]) - val_formula = '+'.join([f'I(np.minimum({x[1]-1},valuation) - np.minimum({x[0]-1},valuation))' for x in val_buckets]) - model=cl.BarnettZehnwirth(formula=origin_formula + '+' + dev_formula + '+' + val_formula, drop=('1982',72)).fit(abc_adj) + origin_buckets = [(0,1),(2,2),(3,4),(5,10)] + dev_buckets = [(24,36),(36,48),(48,84),(84,108),(108,144)] + val_buckets = [(1,8),(8,9),(9,12)] + + abc_formula = PTF_formula(abc_adj,alpha=origin_buckets,gamma=dev_buckets,iota=val_buckets) + + model=cl.BarnettZehnwirth(formula=abc_formula, drop=('1982',72)).fit(abc_adj) assert np.all( np.around(model.coef_.values,4).flatten() == np.array([11.1579,0.1989,0.0703,0.0919,0.1871,-0.3771,-0.4465,-0.3727,-0.3154,0.0432,0.0858,0.1464]) diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py index 3352f8e5..fea2a552 100644 --- a/chainladder/utils/utility_functions.py +++ b/chainladder/utils/utility_functions.py @@ -767,3 +767,29 @@ def model_diagnostics(model, name=None, groupby=None): out.index = idx triangles.append(out) return concat(triangles, 0) + + +def PTF_formula(tri: Triangle, alpha: ArrayLike = None, gamma: ArrayLike = None, iota: ArrayLike = None): + """ Helper formula that builds a patsy formula string for the BarnettZehnwirth + estimator. Each axis's parameters can be grouped together. Groups of origin + parameters (alpha) are set equal, and are specified by a ranges (inclusive). + Groups of development (gamma) and valuation (iota) parameters are fit to + separate linear trends, specified as tuples denoting ranges with shared endpoints. + In other words, development and valuation trends are fit to a piecewise linear model. + A triangle must be supplied to provide some critical information. + """ + formula_parts=[] + if(alpha): + # The intercept term takes the place of the first alpha + for ind,a in enumerate(alpha): + if(a[0]==0): + alpha=alpha[:ind]+alpha[(ind+1):] + formula_parts += ['+'.join([f'I({x[0]} <= origin)' for x in alpha])] + if(gamma): + dgrain = min(tri.development) + formula_parts += ['+'.join([f'I((np.minimum({x[1]-dgrain},development) - np.minimum({x[0]-dgrain},development))/{dgrain})' for x in gamma])] + if(iota): + formula_parts += ['+'.join([f'I(np.minimum({x[1]-1},valuation) - np.minimum({x[0]-1},valuation))' for x in iota])] + if(formula_parts): + return '+'.join(formula_parts) + return ''