@@ -613,11 +613,11 @@ def score_estimator(
613613
614614# %%
615615#
616- # Finally, we can compare the two models using a plot of cumulated claims: for
616+ # Finally, we can compare the two models using a plot of cumulative claims: for
617617# each model, the policyholders are ranked from safest to riskiest based on the
618- # model predictions and the fraction of observed total cumulated claims is
619- # plotted on the y axis . This plot is often called the ordered Lorenz curve of
620- # the model.
618+ # model predictions and the cumulative proportion of claim amounts is plotted
619+ # against the cumulative proportion of exposure . This plot is often called
620+ # the ordered Lorenz curve of the model.
621621#
622622# The Gini coefficient (based on the area between the curve and the diagonal)
623623# can be used as a model selection metric to quantify the ability of the model
@@ -627,7 +627,7 @@ def score_estimator(
627627# Gini coefficient is upper bounded by 1.0 but even an oracle model that ranks
628628# the policyholders by the observed claim amounts cannot reach a score of 1.0.
629629#
630- # We observe that both models are able to rank policyholders by risky-ness
630+ # We observe that both models are able to rank policyholders by riskiness
631631# significantly better than chance although they are also both far from the
632632# oracle model due to the natural difficulty of the prediction problem from a
633633# few features: most accidents are not predictable and can be caused by
@@ -653,11 +653,11 @@ def lorenz_curve(y_true, y_pred, exposure):
653653 ranking = np .argsort (y_pred )
654654 ranked_exposure = exposure [ranking ]
655655 ranked_pure_premium = y_true [ranking ]
656- cumulated_claim_amount = np .cumsum (ranked_pure_premium * ranked_exposure )
657- cumulated_claim_amount /= cumulated_claim_amount [- 1 ]
658- cumulated_exposure = np .cumsum (ranked_exposure )
659- cumulated_exposure /= cumulated_exposure [- 1 ]
660- return cumulated_exposure , cumulated_claim_amount
656+ cumulative_claim_amount = np .cumsum (ranked_pure_premium * ranked_exposure )
657+ cumulative_claim_amount /= cumulative_claim_amount [- 1 ]
658+ cumulative_exposure = np .cumsum (ranked_exposure )
659+ cumulative_exposure /= cumulative_exposure [- 1 ]
660+ return cumulative_exposure , cumulative_claim_amount
661661
662662
663663fig , ax = plt .subplots (figsize = (8 , 8 ))
@@ -669,27 +669,30 @@ def lorenz_curve(y_true, y_pred, exposure):
669669 ("Frequency * Severity model" , y_pred_product ),
670670 ("Compound Poisson Gamma" , y_pred_total ),
671671]:
672- ordered_samples , cum_claims = lorenz_curve (
672+ cum_exposure , cum_claims = lorenz_curve (
673673 df_test ["PurePremium" ], y_pred , df_test ["Exposure" ]
674674 )
675- gini = 1 - 2 * auc (ordered_samples , cum_claims )
675+ gini = 1 - 2 * auc (cum_exposure , cum_claims )
676676 label += " (Gini index: {:.3f})" .format (gini )
677- ax .plot (ordered_samples , cum_claims , linestyle = "-" , label = label )
677+ ax .plot (cum_exposure , cum_claims , linestyle = "-" , label = label )
678678
679679# Oracle model: y_pred == y_test
680- ordered_samples , cum_claims = lorenz_curve (
680+ cum_exposure , cum_claims = lorenz_curve (
681681 df_test ["PurePremium" ], df_test ["PurePremium" ], df_test ["Exposure" ]
682682)
683- gini = 1 - 2 * auc (ordered_samples , cum_claims )
683+ gini = 1 - 2 * auc (cum_exposure , cum_claims )
684684label = "Oracle (Gini index: {:.3f})" .format (gini )
685- ax .plot (ordered_samples , cum_claims , linestyle = "-." , color = "gray" , label = label )
685+ ax .plot (cum_exposure , cum_claims , linestyle = "-." , color = "gray" , label = label )
686686
687687# Random baseline
688688ax .plot ([0 , 1 ], [0 , 1 ], linestyle = "--" , color = "black" , label = "Random baseline" )
689689ax .set (
690690 title = "Lorenz Curves" ,
691- xlabel = "Fraction of policyholders\n (ordered by model from safest to riskiest)" ,
692- ylabel = "Fraction of total claim amount" ,
691+ xlabel = (
692+ "Cumulative proportion of exposure\n "
693+ "(ordered by model from safest to riskiest)"
694+ ),
695+ ylabel = "Cumulative proportion of claim amounts" ,
693696)
694697ax .legend (loc = "upper left" )
695698plt .plot ()
0 commit comments