1- import matplotlib .pyplot as plt
21import numpy as np
3- from sklearn .utils .validation import check_consistent_length
42import warnings
3+ import matplotlib .pyplot as plt
4+ from sklearn .utils .validation import check_consistent_length
55from ..metrics import uplift_curve , auuc , qini_curve , auqc , response_rate_by_percentile , treatment_balance_curve
66
77
@@ -20,7 +20,7 @@ def plot_uplift_preds(trmnt_preds, ctrl_preds, log=False, bins=100):
2020 Returns:
2121 Object that stores computed values.
2222 """
23- # ToDo : Add k as parameter: vertical line on plots
23+ # TODO : Add k as parameter: vertical line on plots
2424 check_consistent_length (trmnt_preds , ctrl_preds )
2525
2626 if not isinstance (bins , int ) or bins <= 0 :
@@ -112,78 +112,116 @@ def plot_uplift_qini_curves(y_true, uplift, treatment, random=True, perfect=Fals
112112 return axes
113113
114114
115- def plot_uplift_by_percentile (y_true , uplift , treatment , strategy , bins = 10 ):
116- """Plot Uplift score at each percentile,
117- Treatment response rate (target mean in the treatment group)
118- and Control response rate (target mean in the control group) at each percentile.
119-
115+ def plot_uplift_by_percentile (y_true , uplift , treatment , strategy , kind = 'line' , bins = 10 ):
116+ """Plot uplift score, treatment response rate and control response rate at each percentile.
117+
118+ Treatment response rate ia a target mean in the treatment group.
119+ Control response rate is a target mean in the control group.
120+ Uplift score is a difference between treatment response rate and control response rate.
121+
120122 Args:
121123 y_true (1d array-like): Correct (true) target values.
122124 uplift (1d array-like): Predicted uplift, as returned by a model.
123125 treatment (1d array-like): Treatment labels.
124- strategy (string, ['overall', 'by_group']): Determines the calculating strategy. Defaults to 'first'.
126+ strategy (string, ['overall', 'by_group']): Determines the calculating strategy.
127+
125128 * ``'overall'``:
126129 The first step is taking the first k observations of all test data ordered by uplift prediction
127130 (overall both groups - control and treatment) and conversions in treatment and control groups
128131 calculated only on them. Then the difference between these conversions is calculated.
132+
129133 * ``'by_group'``:
130134 Separately calculates conversions in top k observations in each group (control and treatment)
131- sorted by uplift predictions. Then the difference between these conversions is calculated
132- bins (int): Determines the number of bins (and relative percentile) in the test data.
133-
135+ sorted by uplift predictions. Then the difference between these conversions is calculated.
136+
137+ kind (string, ['line', 'bar']): The type of plot to draw. Default is 'line'.
138+
139+ * ``'line'``:
140+ Generates a line plot.
141+
142+ * ``'bar'``:
143+ Generates a traditional bar-style plot.
144+
145+ bins (int): Determines а number of bins (and а relative percentile) in the test data. Default is 10.
146+
134147 Returns:
135148 Object that stores computed values.
136149 """
137-
150+
138151 strategy_methods = ['overall' , 'by_group' ]
139-
152+ kind_methods = ['line' , 'bar' ]
153+
140154 n_samples = len (y_true )
141155 check_consistent_length (y_true , uplift , treatment )
142-
156+
143157 if strategy not in strategy_methods :
144158 raise ValueError (f'Response rate supports only calculating methods in { strategy_methods } ,'
145159 f' got { strategy } .' )
146-
160+
161+ if kind not in kind_methods :
162+ raise ValueError (f'Function supports only types of plots in { kind_methods } ,'
163+ f' got { kind } .' )
164+
147165 if not isinstance (bins , int ) or bins <= 0 :
148166 raise ValueError (f'Bins should be positive integer. Invalid value bins: { bins } ' )
149167
150168 if bins >= n_samples :
151169 raise ValueError (f'Number of bins = { bins } should be smaller than the length of y_true { n_samples } ' )
152-
153- if bins == 1 :
154- warnings .warn (f'You will get the only one bin of { n_samples } samples'
155- f' which is the length of y_true.'
156- f'\n Please consider using uplift_at_k function instead' ,
157- UserWarning )
158-
170+
159171 rspns_rate_trmnt , var_trmnt = response_rate_by_percentile (y_true , uplift ,
160172 treatment , group = 'treatment' ,
161173 strategy = strategy , bins = bins )
162-
174+
163175 rspns_rate_ctrl , var_ctrl = response_rate_by_percentile (y_true , uplift ,
164176 treatment , group = 'control' ,
165177 strategy = strategy , bins = bins )
166178
167179 uplift_score , uplift_variance = np .subtract (rspns_rate_trmnt , rspns_rate_ctrl ), np .add (var_trmnt , var_ctrl )
168-
180+
169181 percentiles = [p * 100 / bins for p in range (1 , bins + 1 )]
170-
171- _ , axes = plt .subplots (ncols = 1 , nrows = 1 , figsize = (8 , 6 ))
172-
173- axes .errorbar (percentiles , uplift_score , yerr = np .sqrt (uplift_variance ),
174- linewidth = 2 , color = 'red' , label = 'uplift' )
175- axes .errorbar (percentiles , rspns_rate_trmnt , yerr = np .sqrt (var_trmnt ),
176- linewidth = 2 , color = 'forestgreen' , label = 'treatment\n response rate' )
177- axes .errorbar (percentiles , rspns_rate_ctrl , yerr = np .sqrt (var_ctrl ),
178- linewidth = 2 , color = 'orange' , label = 'control\n response rate' )
179- axes .fill_between (percentiles , rspns_rate_ctrl , rspns_rate_trmnt , alpha = 0.1 , color = 'red' )
180-
181- axes .set_xticks (percentiles )
182- axes .legend (loc = 'upper right' )
183- axes .set_title ('Uplift by percentile' )
184- axes .set_xlabel ('Percentile' )
185- axes .set_ylabel ('Uplift = treatment response rate - control response rate' )
186-
182+
183+ if kind == 'line' :
184+ _ , axes = plt .subplots (ncols = 1 , nrows = 1 , figsize = (8 , 6 ))
185+ axes .errorbar (percentiles , uplift_score , yerr = np .sqrt (uplift_variance ),
186+ linewidth = 2 , color = 'red' , label = 'uplift' )
187+ axes .errorbar (percentiles , rspns_rate_trmnt , yerr = np .sqrt (var_trmnt ),
188+ linewidth = 2 , color = 'forestgreen' , label = 'treatment\n response rate' )
189+ axes .errorbar (percentiles , rspns_rate_ctrl , yerr = np .sqrt (var_ctrl ),
190+ linewidth = 2 , color = 'orange' , label = 'control\n response rate' )
191+ axes .fill_between (percentiles , rspns_rate_ctrl , rspns_rate_trmnt , alpha = 0.1 , color = 'red' )
192+
193+ if np .amin (uplift_score ) < 0 :
194+ axes .axhline (y = 0 , color = 'black' , linewidth = 1 )
195+ axes .set_xticks (percentiles )
196+ axes .legend (loc = 'upper right' )
197+ axes .set_title ('Uplift by percentile' )
198+ axes .set_xlabel ('Percentile' )
199+ axes .set_ylabel ('Uplift = treatment response rate - control response rate' )
200+
201+ else : # kind == 'bar'
202+ delta = percentiles [0 ]
203+ fig , axes = plt .subplots (ncols = 1 , nrows = 2 , figsize = (8 , 6 ), sharex = True , sharey = True )
204+ fig .text (0.04 , 0.5 , 'Uplift = treatment response rate - control response rate' ,
205+ va = 'center' , ha = 'center' , rotation = 'vertical' )
206+
207+ axes [0 ].bar (np .array (percentiles ), uplift_score , delta / 1.5 ,
208+ yerr = np .sqrt (uplift_variance ), color = 'red' , label = 'uplift' )
209+ axes [1 ].bar (np .array (percentiles ) - delta / 6 , rspns_rate_trmnt , delta / 3 ,
210+ yerr = np .sqrt (var_trmnt ), color = 'forestgreen' , label = 'treatment\n response rate' )
211+ axes [1 ].bar (np .array (percentiles ) + delta / 6 , rspns_rate_ctrl , delta / 3 ,
212+ yerr = np .sqrt (var_ctrl ), color = 'orange' , label = 'control\n response rate' )
213+
214+ axes [0 ].legend (loc = 'upper right' )
215+ axes [0 ].tick_params (axis = 'x' , bottom = False )
216+ axes [0 ].axhline (y = 0 , color = 'black' , linewidth = 1 )
217+ axes [0 ].set_title ('Uplift by percentile' )
218+
219+ axes [1 ].set_xticks (percentiles )
220+ axes [1 ].legend (loc = 'upper right' )
221+ axes [1 ].axhline (y = 0 , color = 'black' , linewidth = 1 )
222+ axes [1 ].set_xlabel ('Percentile' )
223+ axes [1 ].set_title ('Response rate by percentile' )
224+
187225 return axes
188226
189227
0 commit comments