@@ -75,7 +75,7 @@ def __init__(self, num_bandits=3, probs=None, payouts=None, live=False,
7575 self .stop_value = stop_criterion .get ('value' , 0.1 )
7676
7777 # Bandit selection strategies
78- self .strategies = ['eps_greedy' , 'softmax' , 'ucb' , 'bayesian_bandit ' ]
78+ self .strategies = ['eps_greedy' , 'softmax' , 'ucb' , 'bayesian ' ]
7979
8080 def run (self , trials = 100 , strategy = None , parameters = None ):
8181 '''
@@ -169,13 +169,27 @@ def max_mean(self):
169169
170170 return np .argmax (self .wins / (self .pulls + 0.1 ))
171171
172- def bayesian_bandit (self , params ):
172+ def bayesian (self , params = None ):
173173 '''
174- Run the Bayesian Bandit algorithm which utilizes a beta distribution for exploration and exploitation.
175- :param params:
176- :return:
174+ Run the Bayesian Bandit algorithm which utilizes a beta distribution
175+ for exploration and exploitation.
176+
177+ Parameters
178+ ----------
179+ params : None
180+ For API consistency, this function can take a parameters argument,
181+ but it is ignored.
182+
183+ Returns
184+ -------
185+ int
186+ Index of chosen bandit
177187 '''
178- p_success_arms = [np .random .beta (self .wins [i ] + 1 , self .pulls [i ] - self .wins [i ] + 1 ) for i in range (len (self .wins ))]
188+ p_success_arms = [
189+ np .random .beta (self .wins [i ] + 1 , self .pulls [i ] - self .wins [i ] + 1 )
190+ for i in range (len (self .wins ))
191+ ]
192+
179193 return np .array (p_success_arms ).argmax ()
180194
181195 def eps_greedy (self , params ):
0 commit comments