Doc string, docs, and minor refactor for Bayesian bandits.

roycoding · roycoding · commit 310643a4e41d · 2016-08-13T00:35:00.000-05:00
diff --git a/README.md b/README.md
@@ -89,7 +89,7 @@ for t in range(10000):
     regb.append(bb.regret())
     bc._run('ucb')
     regc.append(bc.regret())
-    bd._run('bayesian_bandit')
+    bd._run('bayesian')
     regd.append(bd.regret())
 
 
@@ -116,6 +116,5 @@ For documentation on the slots API, see [slots-docs.md](https://github.com/royco
 
 ### Todo list:
 - More MAB strategies
-  - Bayesian bandits
 - Argument to save regret values after each trial in an array.
 - TESTS!
diff --git a/docs/slots-docs.md b/docs/slots-docs.md
@@ -127,6 +127,7 @@ mab.strategy_info()
 - [x] Softmax
 - [ ] Softmax decreasing
 - [x] Upper credible bound
+- [x] Bayesian bandits
 
 ###Example: Running slots with a live website
 ```Python
diff --git a/slots/slots.py b/slots/slots.py
@@ -75,7 +75,7 @@ def __init__(self, num_bandits=3, probs=None, payouts=None, live=False,
         self.stop_value = stop_criterion.get('value', 0.1)
 
         # Bandit selection strategies
-        self.strategies = ['eps_greedy', 'softmax', 'ucb', 'bayesian_bandit']
+        self.strategies = ['eps_greedy', 'softmax', 'ucb', 'bayesian']
 
     def run(self, trials=100, strategy=None, parameters=None):
         '''
@@ -169,13 +169,27 @@ def max_mean(self):
 
         return np.argmax(self.wins / (self.pulls + 0.1))
 
-    def bayesian_bandit(self, params):
+    def bayesian(self, params=None):
         '''
-        Run the Bayesian Bandit algorithm which utilizes a beta distribution for exploration and exploitation.
-        :param params:
-        :return:
+        Run the Bayesian Bandit algorithm which utilizes a beta distribution
+        for exploration and exploitation.
+
+        Parameters
+        ----------
+        params : None
+            For API consistency, this function can take a parameters argument,
+            but it is ignored.
+
+        Returns
+        -------
+        int
+            Index of chosen bandit
         '''
-        p_success_arms = [np.random.beta(self.wins[i] + 1, self.pulls[i] - self.wins[i] + 1) for i in range(len(self.wins))]
+        p_success_arms = [
+            np.random.beta(self.wins[i] + 1, self.pulls[i] - self.wins[i] + 1)
+            for i in range(len(self.wins))
+            ]
+
         return np.array(p_success_arms).argmax()
 
     def eps_greedy(self, params):