Merge branch PR #3

roycoding · roycoding · commit 0f691592aef9 · 2016-08-13T00:28:07.000-05:00
diff --git a/README.md b/README.md
@@ -75,18 +75,22 @@ probs = [0.4, 0.9, 0.8]
 ba = slots.MAB(probs=probs)
 bb = slots.MAB(probs=probs)
 bc = slots.MAB(probs=probs)
+bd = slots.MAB(probs=probs)
 
 # Run trials and calculate the regret after each trial
 rega = []
 regb = []
 regc = []
+regd = []
 for t in range(10000):
     ba._run('eps_greedy')
     rega.append(ba.regret())
     bb._run('softmax')
     regb.append(bb.regret())
     bc._run('ucb')
     regc.append(bc.regret())
+    bd._run('bayesian_bandit')
+    regd.append(bd.regret())
 
 
 # Pretty plotting
@@ -97,6 +101,7 @@ plt.figure(figsize=(15,4))
 plt.plot(rega, label='$\epsilon$-greedy ($\epsilon$=0.1)')
 plt.plot(regb, label='Softmax ($T$=0.1)')
 plt.plot(regc, label='UCB')
+plt.plot(regd, label='Bayesian Bandit')
 plt.legend()
 plt.xlabel('Trials')
 plt.ylabel('Regret')
diff --git a/misc/regret_plot.png b/misc/regret_plot.png
diff --git a/slots/slots.py b/slots/slots.py
@@ -75,7 +75,7 @@ def __init__(self, num_bandits=3, probs=None, payouts=None, live=False,
         self.stop_value = stop_criterion.get('value', 0.1)
 
         # Bandit selection strategies
-        self.strategies = ['eps_greedy', 'softmax', 'ucb']
+        self.strategies = ['eps_greedy', 'softmax', 'ucb', 'bayesian_bandit']
 
     def run(self, trials=100, strategy=None, parameters=None):
         '''
@@ -169,6 +169,15 @@ def max_mean(self):
 
         return np.argmax(self.wins / (self.pulls + 0.1))
 
+    def bayesian_bandit(self, params):
+        '''
+        Run the Bayesian Bandit algorithm which utilizes a beta distribution for exploration and exploitation.
+        :param params:
+        :return:
+        '''
+        p_success_arms = [np.random.beta(self.wins[i] + 1, self.pulls[i] - self.wins[i] + 1) for i in range(len(self.wins))]
+        return np.array(p_success_arms).argmax()
+
     def eps_greedy(self, params):
         '''
         Run the epsilon-greedy strategy and update self.max_mean()