Skip to content

Commit c1789b4

Browse files
authored
🧁 Fix metrics: make string percentiles (#76)
* 🍰 Make index percentiles as strings * 😋 Add str xticks uplift_by_perc * 📝 Add string_percentiles to docstring * 📝 Add checker string_percentiles
1 parent 6674a1b commit c1789b4

File tree

2 files changed

+42
-9
lines changed

2 files changed

+42
-9
lines changed

sklift/metrics/metrics.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,8 @@ def weighted_average_uplift(y_true, uplift, treatment, strategy='overall', bins=
540540
return weighted_avg_uplift
541541

542542

543-
def uplift_by_percentile(y_true, uplift, treatment, strategy='overall', bins=10, std=False, total=False):
543+
def uplift_by_percentile(y_true, uplift, treatment, strategy='overall',
544+
bins=10, std=False, total=False, string_percentiles=True):
544545
"""Compute metrics: uplift, group size, group response rate, standard deviation at each percentile.
545546
546547
Metrics in columns and percentiles in rows of pandas DataFrame:
@@ -571,6 +572,7 @@ def uplift_by_percentile(y_true, uplift, treatment, strategy='overall', bins=10,
571572
The total uplift is a weighted average uplift. See :func:`.weighted_average_uplift`.
572573
The total response rate is a response rate on the full data amount.
573574
bins (int): Determines the number of bins (and the relative percentile) in the data. Default is 10.
575+
string_percentiles (bool): type of percentiles in the index: float or string. Default is True (string).
574576
575577
Returns:
576578
pandas.DataFrame: DataFrame where metrics are by columns and percentiles are by rows.
@@ -602,6 +604,10 @@ def uplift_by_percentile(y_true, uplift, treatment, strategy='overall', bins=10,
602604
if bins >= n_samples:
603605
raise ValueError(f'Number of bins = {bins} should be smaller than the length of y_true {n_samples}')
604606

607+
if not isinstance(string_percentiles, bool):
608+
raise ValueError(f'string_percentiles flag should be bool: True or False.'
609+
f' Invalid value string_percentiles: {string_percentiles}')
610+
605611
y_true, uplift, treatment = np.array(y_true), np.array(uplift), np.array(treatment)
606612

607613
response_rate_trmnt, variance_trmnt, n_trmnt = response_rate_by_percentile(
@@ -613,7 +619,12 @@ def uplift_by_percentile(y_true, uplift, treatment, strategy='overall', bins=10,
613619
uplift_scores = response_rate_trmnt - response_rate_ctrl
614620
uplift_variance = variance_trmnt + variance_ctrl
615621

616-
percentiles = [round(p * 100 / bins, 1) for p in range(1, bins + 1)]
622+
percentiles = [round(p * 100 / bins) for p in range(1, bins + 1)]
623+
624+
if string_percentiles:
625+
percentiles = [f"0-{percentiles[0]}"] + \
626+
[f"{percentiles[i]}-{percentiles[i + 1]}" for i in range(len(percentiles) - 1)]
627+
617628

618629
df = pd.DataFrame({
619630
'percentile': percentiles,

sklift/viz/base.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,8 @@ def plot_qini_curve(y_true, uplift, treatment, random=True, perfect=True, negati
146146
return ax
147147

148148

149-
def plot_uplift_by_percentile(y_true, uplift, treatment, strategy='overall', kind='line', bins=10):
149+
def plot_uplift_by_percentile(y_true, uplift, treatment, strategy='overall',
150+
kind='line', bins=10, string_percentiles=True):
150151
"""Plot uplift score, treatment response rate and control response rate at each percentile.
151152
152153
Treatment response rate ia a target mean in the treatment group.
@@ -175,6 +176,7 @@ def plot_uplift_by_percentile(y_true, uplift, treatment, strategy='overall', kin
175176
Generates a traditional bar-style plot.
176177
177178
bins (int): Determines а number of bins (and the relative percentile) in the test data. Default is 10.
179+
string_percentiles (bool): type of xticks: float or string to plot. Default is True (string).
178180
179181
Returns:
180182
Object that stores computed values.
@@ -203,8 +205,12 @@ def plot_uplift_by_percentile(y_true, uplift, treatment, strategy='overall', kin
203205
raise ValueError(
204206
f'Number of bins = {bins} should be smaller than the length of y_true {n_samples}')
205207

208+
if not isinstance(string_percentiles, bool):
209+
raise ValueError(f'string_percentiles flag should be bool: True or False.'
210+
f' Invalid value string_percentiles: {string_percentiles}')
211+
206212
df = uplift_by_percentile(y_true, uplift, treatment, strategy=strategy,
207-
std=True, total=True, bins=bins)
213+
std=True, total=True, bins=bins, string_percentiles=False)
208214

209215
percentiles = df.index[:bins].values.astype(float)
210216

@@ -219,7 +225,8 @@ def plot_uplift_by_percentile(y_true, uplift, treatment, strategy='overall', kin
219225

220226
uplift_weighted_avg = df.loc['total', 'uplift']
221227

222-
check_consistent_length(percentiles, response_rate_trmnt, response_rate_ctrl, uplift_score,
228+
check_consistent_length(percentiles, response_rate_trmnt,
229+
response_rate_ctrl, uplift_score,
223230
std_trmnt, std_ctrl, std_uplift)
224231

225232
if kind == 'line':
@@ -235,7 +242,15 @@ def plot_uplift_by_percentile(y_true, uplift, treatment, strategy='overall', kin
235242

236243
if np.amin(uplift_score) < 0:
237244
axes.axhline(y=0, color='black', linewidth=1)
238-
axes.set_xticks(percentiles)
245+
246+
if string_percentiles: # string percentiles for plotting
247+
percentiles_str = [f"0-{percentiles[0]:.0f}"] + \
248+
[f"{percentiles[i]:.0f}-{percentiles[i + 1]:.0f}" for i in range(len(percentiles) - 1)]
249+
axes.set_xticks(percentiles)
250+
axes.set_xticklabels(percentiles_str, rotation=45)
251+
else:
252+
axes.set_xticks(percentiles)
253+
239254
axes.legend(loc='upper right')
240255
axes.set_title(
241256
f'Uplift by percentile\nweighted average uplift = {uplift_weighted_avg:.4f}')
@@ -245,8 +260,7 @@ def plot_uplift_by_percentile(y_true, uplift, treatment, strategy='overall', kin
245260

246261
else: # kind == 'bar'
247262
delta = percentiles[0]
248-
fig, axes = plt.subplots(ncols=1, nrows=2, figsize=(
249-
8, 6), sharex=True, sharey=True)
263+
fig, axes = plt.subplots(ncols=1, nrows=2, figsize=(8, 6), sharex=True, sharey=True)
250264
fig.text(0.04, 0.5, 'Uplift = treatment response rate - control response rate',
251265
va='center', ha='center', rotation='vertical')
252266

@@ -263,7 +277,15 @@ def plot_uplift_by_percentile(y_true, uplift, treatment, strategy='overall', kin
263277
axes[0].set_title(
264278
f'Uplift by percentile\nweighted average uplift = {uplift_weighted_avg:.4f}')
265279

266-
axes[1].set_xticks(percentiles)
280+
if string_percentiles: # string percentiles for plotting
281+
percentiles_str = [f"0-{percentiles[0]:.0f}"] + \
282+
[f"{percentiles[i]:.0f}-{percentiles[i + 1]:.0f}" for i in range(len(percentiles) - 1)]
283+
axes[1].set_xticks(percentiles)
284+
axes[1].set_xticklabels(percentiles_str, rotation=45)
285+
286+
else:
287+
axes[1].set_xticks(percentiles)
288+
267289
axes[1].legend(loc='upper right')
268290
axes[1].axhline(y=0, color='black', linewidth=1)
269291
axes[1].set_xlabel('Percentile')

0 commit comments

Comments
 (0)