Skip to content

Commit 02c769c

Browse files
committed
Changed API doc and code to support 0.4 feature set:
- Only handle binary outcomes (no variable payouts) - Handle historical payouts - `est_payouts` is now `est_probs`
1 parent b7f20ee commit 02c769c

File tree

2 files changed

+89
-61
lines changed

2 files changed

+89
-61
lines changed

docs/slots-docs.md

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,46 +13,49 @@ This documents details the current and planned API for slots. Non-implemented fe
1313
1. Current choice
1414
2. number of trials completed for each arm
1515
3. scores for each arm
16-
4. average payout per arm (payout*wins/trials?)
16+
4. average payout per arm (wins/trials?)
1717
5. Current regret. Regret = Trials*mean_max - sum^T_t=1(reward_t)
1818
- See [ref](http://research.microsoft.com/en-us/um/people/sebubeck/SurveyBCB12.pdf)
1919
6. Use sane defaults.
2020
7. Be obvious and clean.
21+
8. For the time being handle only binary payouts.
2122

2223
### Library API ideas:
2324
#### Running slots with a live website
2425
```Python
25-
# Using slots to determine the best of 3 variations on a live website. 3 is the default.
26+
# Using slots to determine the best of 3 variations on a live website. 3 is the default number of bandits and epsilon greedy is the default strategy.
2627
mab = slots.MAB(3, live=True)
2728

2829
# Make the first choice randomly, record responses, and input reward
2930
# 2 was chosen.
30-
# Run online trial (input most recent result) until test criteria is met.
31+
# Update online trial (input most recent result) until test criteria is met.
3132
mab.online_trial(bandit=2,payout=1)
3233

3334
# Repsonse of mab.online_trial() is a dict of the form:
3435
{'new_trial': boolean, 'choice': int, 'best': int}
3536

3637
# Where:
3738
# If the criterion is met, new_trial = False.
38-
# choice is the current choice of arm to try.
39+
# choice is the current choice of arm to try next.
3940
# best is the current best estimate of the highest payout arm.
4041
```
4142

4243
#### Creating a MAB test instance:
4344

4445
```Python
45-
# Default: 3 bandits with random p_i and pay_i = 1
46-
mab = slots.MAB(live=False)
46+
# Default: 3 bandits with random probabilities, p_i.
47+
mab = slots.MAB()
4748

48-
# Set up 4 bandits with random p_i and pay_i
49-
mab = slots.MAB(4, live=False)
49+
# Set up 4 bandits with random p_i.
50+
mab = slots.MAB(4)
5051

5152
# 4 bandits with specified p_i
52-
mab = slots.MAB(probs = [0.2,0.1,0.4,0.1], live=False)
53+
mab = slots.MAB(probs = [0.2,0.1,0.4,0.1])
5354

54-
# 3 bandits with specified pay_i
55-
mab = slots.MAB(payouts = [1,10,15], live=False)
55+
# Creating 3 bandits with histoprical payout data
56+
mab = slots.MAB(3, hist_payouts = np.array([[0,0,1,...],
57+
[1,0,0,...],
58+
[0,0,0,...]]))
5659
```
5760

5861
#### Running tests with strategy, S
@@ -98,8 +101,8 @@ mab.bandits.reset()
98101

99102
# Set probabilities or payouts
100103
# (NOT YET IMPLEMENTED)
101-
mab.bandits.probs_set([0.1,0.05,0.2,0.15])
102-
mab.bandits.payouts_set([1,1.5,0.5,0.8])
104+
mab.bandits.set_probs([0.1,0.05,0.2,0.15])
105+
mab.bandits.set_hist_payouts([[1,1,0,0],[0,1,0,0]])
103106
```
104107

105108
#### Displaying / retrieving test info
@@ -114,10 +117,10 @@ mab.prob_est()
114117

115118
# Retrieve bandit probability estimate of bandit i
116119
# (NOT YET IMPLEMENTED)
117-
mab.prob_est(i)
120+
mab.est_prob(i)
118121

119-
# Retrieve bandit payout estimates (p * payout)
120-
mab.est_payout()
122+
# Retrieve bandit probability estimates
123+
mab.est_probs()
121124

122125
# Retrieve current bandit choice
123126
# (NOT YET IMPLEMENTED, use mab.choices[-1])

slots/slots.py

Lines changed: 70 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,12 @@
1010
mab.best # Bandit with highest probability after T trials
1111
1212
- Run MAB test on "real" payout data (probabilites unknown).
13-
mab = slots.MAB(payouts = [0,0,0,1,0,0,0,0,0,....])
14-
mab.run(trials = 10000) # Max is length of payouts
13+
mab = slots.MAB(hist_payouts = [[0,0,...], [1,0,...], [0,1,...])
14+
mab.run(trials = 10000)
15+
16+
- Run MAB test on "live" data
17+
mab = slots.MAB(num_bandits=3, live=True)
18+
mab.online_trial(bandit=1, payout=0)
1519
"""
1620

1721

@@ -27,66 +31,78 @@ def __init__(
2731
self,
2832
num_bandits=3,
2933
probs=None,
30-
payouts=None,
34+
hist_payouts=None,
3135
live=False,
3236
stop_criterion={"criterion": "regret", "value": 0.1},
3337
):
3438
"""
3539
Parameters
3640
----------
37-
num_bandits : int
41+
num_bandits : int, optional
3842
default is 3
39-
probs : np.array of floats
43+
probs : array of floats, optional
4044
payout probabilities
41-
payouts : np.array of floats
42-
If `live` is True, `payouts` should be None.
43-
live : bool
45+
hist_payouts : list of lists of ints, one array per bandit, optional
46+
This is for testing on historical data.
47+
If you set `probs` or `live` is True, `hist_payouts` should be None.
48+
live : bool, optional
4449
Whether the use is for a live, online trial.
45-
stop_criterion : dict
50+
stop_criterion : dict, optional
4651
Stopping criterion (str) and threshold value (float).
4752
"""
4853

4954
self.choices = []
5055

5156
if not probs:
52-
if not payouts:
57+
if not hist_payouts:
5358
if live:
5459
# Live trial scenario, where nothing is known except the
5560
# number of bandits
5661
self.bandits = Bandits(
57-
live=True, payouts=np.zeros(num_bandits), probs=None
62+
live=True, payouts=np.zeros(num_bandits)
5863
)
5964
else:
60-
# A pure experiment scenario with random probabilities
61-
# and single payout values are 1.
65+
# A pure experiment scenario with random probabilities.
6266
self.bandits = Bandits(
63-
probs=[np.random.rand() for x in range(num_bandits)],
64-
payouts=np.ones(num_bandits),
67+
probs=np.random.rand(num_bandits),
68+
payouts=np.zeros(num_bandits),
6569
live=False,
6670
)
6771
else:
6872
# Run strategies on known historical sequence of payouts. Probabilities are not known.
73+
num_bandits = len(hist_payouts)
6974
if live:
7075
print(
7176
"slots: Cannot have a defined array of payouts and live=True. live set to False"
7277
)
7378
self.bandits = Bandits(
74-
probs=[np.random.rand() for x in range(len(payouts))],
75-
payouts=payouts,
79+
hist_payouts=hist_payouts,
80+
payouts=np.zeros(num_bandits),
7681
live=False,
7782
)
78-
num_bandits = len(payouts)
7983
else:
80-
if payouts:
81-
# A pure experiment scenario with known probabilities and known single payout values.
82-
self.bandits = Bandits(probs=probs, payouts=payouts, live=False)
83-
num_bandits = len(payouts)
84+
if hist_payouts:
85+
# A pure experiment scenario with known historical payout values. Probabilities will be ignored.
86+
num_bandits = len(probs)
87+
print(
88+
"slots: Since historical payout data has been supplied, probabilities will be ignored."
89+
)
90+
if len(probs) == len(payouts):
91+
self.bandits = Bandits(
92+
hist_payouts=hist_payouts,
93+
live=False,
94+
payouts=np.zeros(num_bandits),
95+
)
96+
else:
97+
raise Exception(
98+
"slots: Dimensions of probs and payouts mismatched."
99+
)
84100
else:
85-
# A pure experiment scenario with known probabilities and single payout values of 1.
101+
# A pure experiment scenario with known probabilities
102+
num_bandits = len(probs)
86103
self.bandits = Bandits(
87-
probs=probs, payouts=np.ones(len(probs)), live=False
104+
probs=probs, payouts=np.zeros(num_bandits), live=False
88105
)
89-
num_bandits = len(probs)
90106

91107
self.wins = np.zeros(num_bandits)
92108
self.pulls = np.zeros(num_bandits)
@@ -123,12 +139,14 @@ def run(self, trials=100, strategy="eps_greedy", parameters=None):
123139
"""
124140

125141
if trials < 1:
126-
raise Exception("MAB.run: Number of trials cannot be less than 1!")
142+
raise Exception(
143+
"slots.MAB.run: Number of trials cannot be less than 1!"
144+
)
127145

128146
else:
129147
if strategy not in self.strategies:
130148
raise Exception(
131-
"MAB,run: Strategy name invalid. Choose from:"
149+
"slots.MAB,run: Strategy name invalid. Choose from:"
132150
" {}".format(", ".join(self.strategies))
133151
)
134152

@@ -346,7 +364,7 @@ def best(self):
346364
else:
347365
return np.argmax(self.wins / (self.pulls + 0.1))
348366

349-
def est_payouts(self):
367+
def est_probs(self):
350368
"""
351369
Calculate current estimate of average payout for each bandit.
352370
@@ -447,7 +465,11 @@ def online_trial(
447465
)
448466

449467
if self.crit_met():
450-
return {"new_trial": False, "choice": self.best(), "best": self.best()}
468+
return {
469+
"new_trial": False,
470+
"choice": self.best(),
471+
"best": self.best(),
472+
}
451473
else:
452474
return {
453475
"new_trial": True,
@@ -463,7 +485,7 @@ def update(self, bandit, payout):
463485
----------
464486
bandit : int
465487
Bandit index
466-
payout : float
488+
payout : int (0 or 1)
467489
468490
Returns
469491
-------
@@ -481,31 +503,27 @@ class Bandits:
481503
Bandit class.
482504
"""
483505

484-
def __init__(self, probs, payouts, live=True):
506+
def __init__(self, payouts, probs=None, hist_payouts=None, live=False):
485507
"""
486508
Instantiate Bandit class, determining
487509
- Probabilities of bandit payouts
488510
- Bandit payouts
489511
490512
Parameters
491513
----------
492-
probs: array of floats
493-
Probabilities of bandit payouts
494-
payouts : array of floats
495-
Amount of bandit payouts. If `live` is True, `payouts` should be an
496-
N length array of zeros.
497-
live : bool
514+
payouts : array of ints
515+
Cumulative bandit payouts. `payouts` should start as an N
516+
length array of zeros, where N is the number of bandits.
517+
probs: array of floats, optional
518+
Probabilities of bandit payouts.
519+
hist_payouts: list of arrays of ints, optional
520+
live : bool, optional
498521
"""
499522

500523
if not live:
501-
# Only use arrays of equal length
502-
if len(probs) != len(payouts):
503-
raise Exception(
504-
"Bandits.__init__: Probability and payouts "
505-
"arrays of different lengths!"
506-
)
507524
self.probs = probs
508525
self.payouts = payouts
526+
self.hist_payouts = hist_payouts
509527
self.live = False
510528
else:
511529
self.live = True
@@ -531,11 +549,18 @@ def pull(self, i):
531549
return self.payouts[i].pop()
532550
else:
533551
return None
552+
elif self.hist_payouts:
553+
if not hist[i]:
554+
return None
555+
else:
556+
_p = hist[i][0]
557+
hist[i] = hist[i][1:]
558+
return _p
534559
else:
535560
if np.random.rand() < self.probs[i]:
536-
return self.payouts[i]
561+
return 1
537562
else:
538-
return 0.0
563+
return 0
539564

540565
def info(self):
541566
pass

0 commit comments

Comments
 (0)