@@ -25,9 +25,7 @@ def uplift_curve(y_true, uplift, treatment):
2525 :func:`plot_uplift_qini_curves`: Plot Uplift and Qini curves.
2626 """
2727
28- # ToDo: Добавить проверки на наличие обоих классов в столбце treatment
29- # ToDo: Добавить проверку на наличие обоих классов в y_true для каждого уникального значения из столбца treatment
30-
28+ # TODO: check the treatment is binary
3129 y_true , uplift , treatment = np .array (y_true ), np .array (uplift ), np .array (treatment )
3230 desc_score_indices = np .argsort (uplift , kind = "mergesort" )[::- 1 ]
3331 y_true , uplift , treatment = y_true [desc_score_indices ], uplift [desc_score_indices ], treatment [desc_score_indices ]
@@ -79,9 +77,7 @@ def qini_curve(y_true, uplift, treatment):
7977
8078 :func:`plot_uplift_qini_curves`: Plot Uplift and Qini curves.
8179 """
82- # ToDo: Добавить проверки на наличие обоих классов в столбце treatment
83- # ToDo: Добавить проверку на наличие обоих классов в столбце y_true для каждого уникального значения из столбца treatment
84-
80+ # TODO: check the treatment is binary
8581 y_true , uplift , treatment = np .array (y_true ), np .array (uplift ), np .array (treatment )
8682
8783 desc_score_indices = np .argsort (uplift , kind = "mergesort" )[::- 1 ]
@@ -127,7 +123,8 @@ def uplift_auc_score(y_true, uplift, treatment):
127123 Returns:
128124 float: Area Under the Uplift Curve.
129125 """
130- # ToDO: Добавить бейзлайн
126+ # ToDO: Add normalization
127+ # ToDO: Add baseline
131128 return auc (* uplift_curve (y_true , uplift , treatment ))
132129
133130
@@ -147,7 +144,6 @@ def auuc(y_true, uplift, treatment):
147144 Metric `auuc` was renamed to :func:`uplift_auc_score`
148145 in version 0.1.0 and will be removed in 0.2.0
149146 """
150- # ToDO: Добавить бейзлайн
151147 warnings .warn (
152148 'Metric `auuc` was renamed to `uplift_auc_score`'
153149 'in version 0.1.0 and will be removed in 0.2.0' ,
@@ -167,7 +163,8 @@ def qini_auc_score(y_true, uplift, treatment):
167163 Returns:
168164 float: Area Under the Qini Curve.
169165 """
170- # ToDO: Добавить бейзлайн
166+ # ToDO: Add normalization
167+ # ToDO: Add baseline
171168 return auc (* qini_curve (y_true , uplift , treatment ))
172169
173170
@@ -187,7 +184,6 @@ def auqc(y_true, uplift, treatment):
187184 Metric `auqc` was renamed to :func:`qini_auc_score`
188185 in version 0.1.0 and will be removed in 0.2.0
189186 """
190- # ToDO: Добавить бейзлайн
191187 warnings .warn (
192188 'Metric `auqc` was renamed to `qini_auc_score`'
193189 'in version 0.1.0 and will be removed in 0.2.0' ,
@@ -259,7 +255,7 @@ def uplift_at_k(y_true, uplift, treatment, strategy, k=0.3):
259255 else :
260256 n_size = k
261257
262- # ToDo: _checker_ there are obervations among two groups among first k
258+ # ToDo: _checker_ there are observations among two groups among first k
263259 score_ctrl = y_true [order ][:n_size ][treatment [order ][:n_size ] == 0 ].mean ()
264260 score_trmnt = y_true [order ][:n_size ][treatment [order ][:n_size ] == 1 ].mean ()
265261
@@ -290,83 +286,90 @@ def uplift_at_k(y_true, uplift, treatment, strategy, k=0.3):
290286
291287
292288def response_rate_by_percentile (y_true , uplift , treatment , group , strategy , bins = 10 ):
293- """Compute response rate (target mean in the control or treatment group) at each percentile.
294-
289+ """Compute response rate and its variance at each percentile.
290+
291+ Response rate ia a target mean in the group.
292+
295293 Args:
296294 y_true (1d array-like): Correct (true) target values.
297295 uplift (1d array-like): Predicted uplift, as returned by a model.
298296 treatment (1d array-like): Treatment labels.
299297 group (string, ['treatment', 'control']): Group type for computing response rate: treatment or control.
298+
300299 * ``'treatment'``:
301- Values equal 1 in the treatment column.
300+ Values equal 1 in the treatment column.
301+
302302 * ``'control'``:
303- Values equal 0 in the treatment column.
304- strategy (string, ['overall', 'by_group']): Determines the calculating strategy.
303+ Values equal 0 in the treatment column.
304+
305+ strategy (string, ['overall', 'by_group']): Determines the calculating strategy.
306+
305307 * ``'overall'``:
306308 The first step is taking the first k observations of all test data ordered by uplift prediction
307309 (overall both groups - control and treatment) and conversions in treatment and control groups
308310 calculated only on them. Then the difference between these conversions is calculated.
311+
309312 * ``'by_group'``:
310313 Separately calculates conversions in top k observations in each group (control and treatment)
311- sorted by uplift predictions. Then the difference between these conversions is calculated
312- bins (int): Determines the number of bins (and relative percentile) in the test data.
313-
314+ sorted by uplift predictions. Then the difference between these conversions is calculated.
315+
316+ bins (int): Determines а number of bins (and а relative percentile) in the test data. Default is 10.
317+
314318 Returns:
315319 array: Response rate at each percentile for control or treatment group
316- array: Variance of the response rate at each percentile
320+ array: Variance of the response rate at each percentile
317321 """
318-
322+
319323 group_types = ['treatment' , 'control' ]
320324 strategy_methods = ['overall' , 'by_group' ]
321-
325+
322326 n_samples = len (y_true )
323327 check_consistent_length (y_true , uplift , treatment )
324-
328+
325329 if group not in group_types :
326330 raise ValueError (f'Response rate supports only group types in { group_types } ,'
327- f' got { group } .' )
331+ f' got { group } .' )
328332
329333 if strategy not in strategy_methods :
330334 raise ValueError (f'Response rate supports only calculating methods in { strategy_methods } ,'
331335 f' got { strategy } .' )
332-
336+
333337 if not isinstance (bins , int ) or bins <= 0 :
334- raise ValueError (f'bins should be positive integer.'
335- f' Invalid value bins: { bins } ' )
336-
338+ raise ValueError (f'Bins should be positive integer. Invalid value bins: { bins } ' )
339+
337340 if bins >= n_samples :
338341 raise ValueError (f'Number of bins = { bins } should be smaller than the length of y_true { n_samples } ' )
339-
342+
340343 if bins == 1 :
341344 warnings .warn (f'You will get the only one bin of { n_samples } samples'
342345 f' which is the length of y_true.'
343346 f'\n Please consider using uplift_at_k function instead' ,
344347 UserWarning )
345-
348+
346349 y_true , uplift , treatment = np .array (y_true ), np .array (uplift ), np .array (treatment )
347350 order = np .argsort (uplift , kind = 'mergesort' )[::- 1 ]
348-
351+
349352 if group == 'treatment' :
350353 trmnt_flag = 1
351354 else : # group == 'control'
352355 trmnt_flag = 0
353-
356+
354357 if strategy == 'overall' :
355358 y_true_bin = np .array_split (y_true [order ], bins )
356359 trmnt_bin = np .array_split (treatment [order ], bins )
357-
360+
358361 group_size = np .array ([len (y [trmnt == trmnt_flag ]) for y , trmnt in zip (y_true_bin , trmnt_bin )])
359362 response_rate = np .array ([np .mean (y [trmnt == trmnt_flag ]) for y , trmnt in zip (y_true_bin , trmnt_bin )])
360363
361364 else : # strategy == 'by_group'
362365 y_bin = np .array_split (y_true [order ][treatment [order ] == trmnt_flag ], bins )
363-
366+
364367 group_size = np .array ([len (y ) for y in y_bin ])
365368 response_rate = np .array ([np .mean (y ) for y in y_bin ])
366369
367370 variance = np .multiply (response_rate , np .divide ((1 - response_rate ), group_size ))
368-
369- return response_rate , variance
371+
372+ return response_rate , variance
370373
371374
372375def treatment_balance_curve (uplift , treatment , winsize ):
0 commit comments