Skip to content

Commit 3a268bd

Browse files
committed
Merge branch 'dev' into feature/fix_docs
2 parents a7d28b3 + e5ca419 commit 3a268bd

File tree

4 files changed

+186
-117
lines changed

4 files changed

+186
-117
lines changed

docs/changelog.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,24 @@
88
* 🔨 something that previously didn’t work as documentated – or according to reasonable expectations – should now work.
99
* ❗️ you will need to change your code to have the same effect in the future; or a feature will be removed in the future.
1010

11+
## Version 0.1.2
12+
_in development_
13+
14+
### [sklift.models](https://scikit-uplift.readthedocs.io/en/latest/api/models.html)
15+
16+
* 🔨 Fix bugs in [TwoModels](https://scikit-uplift.readthedocs.io/en/latest/api/models.html#sklift.models.models.TwoModels) for regression problem.
17+
* 📝 Minor code refactoring.
18+
19+
### [sklift.metrics](https://scikit-uplift.readthedocs.io/en/latest/api/metrics.html)
20+
21+
* 📝 Minor code refactoring.
22+
23+
### [sklift.viz](https://scikit-uplift.readthedocs.io/en/latest/api/viz.html)
24+
25+
* 💥 Add bar plot in [plot_uplift_by_percentile](https://scikit-uplift.readthedocs.io/en/latest/api/viz.html#sklift.viz.base.plot_uplift_by_percentile) by [@ElisovaIra](https://github.com/ElisovaIra).
26+
* 🔨 Fix bug in [plot_uplift_by_percentile](https://scikit-uplift.readthedocs.io/en/latest/api/viz.html#sklift.viz.base.plot_uplift_by_percentile).
27+
* 📝 Minor code refactoring.
28+
1129
## Version 0.1.1
1230

1331
### [sklift.viz](https://scikit-uplift.readthedocs.io/en/latest/api/viz.html)

sklift/metrics/metrics.py

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,7 @@ def uplift_curve(y_true, uplift, treatment):
2525
:func:`plot_uplift_qini_curves`: Plot Uplift and Qini curves.
2626
"""
2727

28-
# ToDo: Добавить проверки на наличие обоих классов в столбце treatment
29-
# ToDo: Добавить проверку на наличие обоих классов в y_true для каждого уникального значения из столбца treatment
30-
28+
# TODO: check the treatment is binary
3129
y_true, uplift, treatment = np.array(y_true), np.array(uplift), np.array(treatment)
3230
desc_score_indices = np.argsort(uplift, kind="mergesort")[::-1]
3331
y_true, uplift, treatment = y_true[desc_score_indices], uplift[desc_score_indices], treatment[desc_score_indices]
@@ -79,9 +77,7 @@ def qini_curve(y_true, uplift, treatment):
7977
8078
:func:`plot_uplift_qini_curves`: Plot Uplift and Qini curves.
8179
"""
82-
# ToDo: Добавить проверки на наличие обоих классов в столбце treatment
83-
# ToDo: Добавить проверку на наличие обоих классов в столбце y_true для каждого уникального значения из столбца treatment
84-
80+
# TODO: check the treatment is binary
8581
y_true, uplift, treatment = np.array(y_true), np.array(uplift), np.array(treatment)
8682

8783
desc_score_indices = np.argsort(uplift, kind="mergesort")[::-1]
@@ -127,7 +123,8 @@ def uplift_auc_score(y_true, uplift, treatment):
127123
Returns:
128124
float: Area Under the Uplift Curve.
129125
"""
130-
# ToDO: Добавить бейзлайн
126+
# ToDO: Add normalization
127+
# ToDO: Add baseline
131128
return auc(*uplift_curve(y_true, uplift, treatment))
132129

133130

@@ -147,7 +144,6 @@ def auuc(y_true, uplift, treatment):
147144
Metric `auuc` was renamed to :func:`uplift_auc_score`
148145
in version 0.1.0 and will be removed in 0.2.0
149146
"""
150-
# ToDO: Добавить бейзлайн
151147
warnings.warn(
152148
'Metric `auuc` was renamed to `uplift_auc_score`'
153149
'in version 0.1.0 and will be removed in 0.2.0',
@@ -167,7 +163,8 @@ def qini_auc_score(y_true, uplift, treatment):
167163
Returns:
168164
float: Area Under the Qini Curve.
169165
"""
170-
# ToDO: Добавить бейзлайн
166+
# ToDO: Add normalization
167+
# ToDO: Add baseline
171168
return auc(*qini_curve(y_true, uplift, treatment))
172169

173170

@@ -187,7 +184,6 @@ def auqc(y_true, uplift, treatment):
187184
Metric `auqc` was renamed to :func:`qini_auc_score`
188185
in version 0.1.0 and will be removed in 0.2.0
189186
"""
190-
# ToDO: Добавить бейзлайн
191187
warnings.warn(
192188
'Metric `auqc` was renamed to `qini_auc_score`'
193189
'in version 0.1.0 and will be removed in 0.2.0',
@@ -259,7 +255,7 @@ def uplift_at_k(y_true, uplift, treatment, strategy, k=0.3):
259255
else:
260256
n_size = k
261257

262-
# ToDo: _checker_ there are obervations among two groups among first k
258+
# ToDo: _checker_ there are observations among two groups among first k
263259
score_ctrl = y_true[order][:n_size][treatment[order][:n_size] == 0].mean()
264260
score_trmnt = y_true[order][:n_size][treatment[order][:n_size] == 1].mean()
265261

@@ -290,83 +286,90 @@ def uplift_at_k(y_true, uplift, treatment, strategy, k=0.3):
290286

291287

292288
def response_rate_by_percentile(y_true, uplift, treatment, group, strategy, bins=10):
293-
"""Compute response rate (target mean in the control or treatment group) at each percentile.
294-
289+
"""Compute response rate and its variance at each percentile.
290+
291+
Response rate ia a target mean in the group.
292+
295293
Args:
296294
y_true (1d array-like): Correct (true) target values.
297295
uplift (1d array-like): Predicted uplift, as returned by a model.
298296
treatment (1d array-like): Treatment labels.
299297
group (string, ['treatment', 'control']): Group type for computing response rate: treatment or control.
298+
300299
* ``'treatment'``:
301-
Values equal 1 in the treatment column.
300+
Values equal 1 in the treatment column.
301+
302302
* ``'control'``:
303-
Values equal 0 in the treatment column.
304-
strategy (string, ['overall', 'by_group']): Determines the calculating strategy.
303+
Values equal 0 in the treatment column.
304+
305+
strategy (string, ['overall', 'by_group']): Determines the calculating strategy.
306+
305307
* ``'overall'``:
306308
The first step is taking the first k observations of all test data ordered by uplift prediction
307309
(overall both groups - control and treatment) and conversions in treatment and control groups
308310
calculated only on them. Then the difference between these conversions is calculated.
311+
309312
* ``'by_group'``:
310313
Separately calculates conversions in top k observations in each group (control and treatment)
311-
sorted by uplift predictions. Then the difference between these conversions is calculated
312-
bins (int): Determines the number of bins (and relative percentile) in the test data.
313-
314+
sorted by uplift predictions. Then the difference between these conversions is calculated.
315+
316+
bins (int): Determines а number of bins (and а relative percentile) in the test data. Default is 10.
317+
314318
Returns:
315319
array: Response rate at each percentile for control or treatment group
316-
array: Variance of the response rate at each percentile
320+
array: Variance of the response rate at each percentile
317321
"""
318-
322+
319323
group_types = ['treatment', 'control']
320324
strategy_methods = ['overall', 'by_group']
321-
325+
322326
n_samples = len(y_true)
323327
check_consistent_length(y_true, uplift, treatment)
324-
328+
325329
if group not in group_types:
326330
raise ValueError(f'Response rate supports only group types in {group_types},'
327-
f' got {group}.')
331+
f' got {group}.')
328332

329333
if strategy not in strategy_methods:
330334
raise ValueError(f'Response rate supports only calculating methods in {strategy_methods},'
331335
f' got {strategy}.')
332-
336+
333337
if not isinstance(bins, int) or bins <= 0:
334-
raise ValueError(f'bins should be positive integer.'
335-
f' Invalid value bins: {bins}')
336-
338+
raise ValueError(f'Bins should be positive integer. Invalid value bins: {bins}')
339+
337340
if bins >= n_samples:
338341
raise ValueError(f'Number of bins = {bins} should be smaller than the length of y_true {n_samples}')
339-
342+
340343
if bins == 1:
341344
warnings.warn(f'You will get the only one bin of {n_samples} samples'
342345
f' which is the length of y_true.'
343346
f'\nPlease consider using uplift_at_k function instead',
344347
UserWarning)
345-
348+
346349
y_true, uplift, treatment = np.array(y_true), np.array(uplift), np.array(treatment)
347350
order = np.argsort(uplift, kind='mergesort')[::-1]
348-
351+
349352
if group == 'treatment':
350353
trmnt_flag = 1
351354
else: # group == 'control'
352355
trmnt_flag = 0
353-
356+
354357
if strategy == 'overall':
355358
y_true_bin = np.array_split(y_true[order], bins)
356359
trmnt_bin = np.array_split(treatment[order], bins)
357-
360+
358361
group_size = np.array([len(y[trmnt == trmnt_flag]) for y, trmnt in zip(y_true_bin, trmnt_bin)])
359362
response_rate = np.array([np.mean(y[trmnt == trmnt_flag]) for y, trmnt in zip(y_true_bin, trmnt_bin)])
360363

361364
else: # strategy == 'by_group'
362365
y_bin = np.array_split(y_true[order][treatment[order] == trmnt_flag], bins)
363-
366+
364367
group_size = np.array([len(y) for y in y_bin])
365368
response_rate = np.array([np.mean(y) for y in y_bin])
366369

367370
variance = np.multiply(response_rate, np.divide((1 - response_rate), group_size))
368-
369-
return response_rate, variance
371+
372+
return response_rate, variance
370373

371374

372375
def treatment_balance_curve(uplift, treatment, winsize):

0 commit comments

Comments
 (0)