1010 mab.best # Bandit with highest probability after T trials
1111
1212 - Run MAB test on "real" payout data (probabilites unknown).
13- mab = slots.MAB(payouts = [0,0,0,1,0,0,0,0,0,....])
14- mab.run(trials = 10000) # Max is length of payouts
13+ mab = slots.MAB(hist_payouts = [[0,0,...], [1,0,...], [0,1,...])
14+ mab.run(trials = 10000)
15+
16+ - Run MAB test on "live" data
17+ mab = slots.MAB(num_bandits=3, live=True)
18+ mab.online_trial(bandit=1, payout=0)
1519"""
1620
1721
@@ -27,66 +31,78 @@ def __init__(
2731 self ,
2832 num_bandits = 3 ,
2933 probs = None ,
30- payouts = None ,
34+ hist_payouts = None ,
3135 live = False ,
3236 stop_criterion = {"criterion" : "regret" , "value" : 0.1 },
3337 ):
3438 """
3539 Parameters
3640 ----------
37- num_bandits : int
41+ num_bandits : int, optional
3842 default is 3
39- probs : np. array of floats
43+ probs : array of floats, optional
4044 payout probabilities
41- payouts : np.array of floats
42- If `live` is True, `payouts` should be None.
43- live : bool
45+ hist_payouts : list of lists of ints, one array per bandit, optional
46+ This is for testing on historical data.
47+ If you set `probs` or `live` is True, `hist_payouts` should be None.
48+ live : bool, optional
4449 Whether the use is for a live, online trial.
45- stop_criterion : dict
50+ stop_criterion : dict, optional
4651 Stopping criterion (str) and threshold value (float).
4752 """
4853
4954 self .choices = []
5055
5156 if not probs :
52- if not payouts :
57+ if not hist_payouts :
5358 if live :
5459 # Live trial scenario, where nothing is known except the
5560 # number of bandits
5661 self .bandits = Bandits (
57- live = True , payouts = np .zeros (num_bandits ), probs = None
62+ live = True , payouts = np .zeros (num_bandits )
5863 )
5964 else :
60- # A pure experiment scenario with random probabilities
61- # and single payout values are 1.
65+ # A pure experiment scenario with random probabilities.
6266 self .bandits = Bandits (
63- probs = [ np .random .rand () for x in range ( num_bandits )] ,
64- payouts = np .ones (num_bandits ),
67+ probs = np .random .rand (num_bandits ),
68+ payouts = np .zeros (num_bandits ),
6569 live = False ,
6670 )
6771 else :
6872 # Run strategies on known historical sequence of payouts. Probabilities are not known.
73+ num_bandits = len (hist_payouts )
6974 if live :
7075 print (
7176 "slots: Cannot have a defined array of payouts and live=True. live set to False"
7277 )
7378 self .bandits = Bandits (
74- probs = [ np . random . rand () for x in range ( len ( payouts ))] ,
75- payouts = payouts ,
79+ hist_payouts = hist_payouts ,
80+ payouts = np . zeros ( num_bandits ) ,
7681 live = False ,
7782 )
78- num_bandits = len (payouts )
7983 else :
80- if payouts :
81- # A pure experiment scenario with known probabilities and known single payout values.
82- self .bandits = Bandits (probs = probs , payouts = payouts , live = False )
83- num_bandits = len (payouts )
84+ if hist_payouts :
85+ # A pure experiment scenario with known historical payout values. Probabilities will be ignored.
86+ num_bandits = len (probs )
87+ print (
88+ "slots: Since historical payout data has been supplied, probabilities will be ignored."
89+ )
90+ if len (probs ) == len (payouts ):
91+ self .bandits = Bandits (
92+ hist_payouts = hist_payouts ,
93+ live = False ,
94+ payouts = np .zeros (num_bandits ),
95+ )
96+ else :
97+ raise Exception (
98+ "slots: Dimensions of probs and payouts mismatched."
99+ )
84100 else :
85- # A pure experiment scenario with known probabilities and single payout values of 1.
101+ # A pure experiment scenario with known probabilities
102+ num_bandits = len (probs )
86103 self .bandits = Bandits (
87- probs = probs , payouts = np .ones ( len ( probs ) ), live = False
104+ probs = probs , payouts = np .zeros ( num_bandits ), live = False
88105 )
89- num_bandits = len (probs )
90106
91107 self .wins = np .zeros (num_bandits )
92108 self .pulls = np .zeros (num_bandits )
@@ -123,12 +139,14 @@ def run(self, trials=100, strategy="eps_greedy", parameters=None):
123139 """
124140
125141 if trials < 1 :
126- raise Exception ("MAB.run: Number of trials cannot be less than 1!" )
142+ raise Exception (
143+ "slots.MAB.run: Number of trials cannot be less than 1!"
144+ )
127145
128146 else :
129147 if strategy not in self .strategies :
130148 raise Exception (
131- "MAB,run: Strategy name invalid. Choose from:"
149+ "slots. MAB,run: Strategy name invalid. Choose from:"
132150 " {}" .format (", " .join (self .strategies ))
133151 )
134152
@@ -346,7 +364,7 @@ def best(self):
346364 else :
347365 return np .argmax (self .wins / (self .pulls + 0.1 ))
348366
349- def est_payouts (self ):
367+ def est_probs (self ):
350368 """
351369 Calculate current estimate of average payout for each bandit.
352370
@@ -447,7 +465,11 @@ def online_trial(
447465 )
448466
449467 if self .crit_met ():
450- return {"new_trial" : False , "choice" : self .best (), "best" : self .best ()}
468+ return {
469+ "new_trial" : False ,
470+ "choice" : self .best (),
471+ "best" : self .best (),
472+ }
451473 else :
452474 return {
453475 "new_trial" : True ,
@@ -463,7 +485,7 @@ def update(self, bandit, payout):
463485 ----------
464486 bandit : int
465487 Bandit index
466- payout : float
488+ payout : int (0 or 1)
467489
468490 Returns
469491 -------
@@ -481,31 +503,27 @@ class Bandits:
481503 Bandit class.
482504 """
483505
484- def __init__ (self , probs , payouts , live = True ):
506+ def __init__ (self , payouts , probs = None , hist_payouts = None , live = False ):
485507 """
486508 Instantiate Bandit class, determining
487509 - Probabilities of bandit payouts
488510 - Bandit payouts
489511
490512 Parameters
491513 ----------
492- probs: array of floats
493- Probabilities of bandit payouts
494- payouts : array of floats
495- Amount of bandit payouts. If `live` is True, `payouts` should be an
496- N length array of zeros.
497- live : bool
514+ payouts : array of ints
515+ Cumulative bandit payouts. `payouts` should start as an N
516+ length array of zeros, where N is the number of bandits.
517+ probs: array of floats, optional
518+ Probabilities of bandit payouts.
519+ hist_payouts: list of arrays of ints, optional
520+ live : bool, optional
498521 """
499522
500523 if not live :
501- # Only use arrays of equal length
502- if len (probs ) != len (payouts ):
503- raise Exception (
504- "Bandits.__init__: Probability and payouts "
505- "arrays of different lengths!"
506- )
507524 self .probs = probs
508525 self .payouts = payouts
526+ self .hist_payouts = hist_payouts
509527 self .live = False
510528 else :
511529 self .live = True
@@ -531,11 +549,18 @@ def pull(self, i):
531549 return self .payouts [i ].pop ()
532550 else :
533551 return None
552+ elif self .hist_payouts :
553+ if not hist [i ]:
554+ return None
555+ else :
556+ _p = hist [i ][0 ]
557+ hist [i ] = hist [i ][1 :]
558+ return _p
534559 else :
535560 if np .random .rand () < self .probs [i ]:
536- return self . payouts [ i ]
561+ return 1
537562 else :
538- return 0.0
563+ return 0
539564
540565 def info (self ):
541566 pass
0 commit comments