@@ -72,36 +72,34 @@ import slots
7272# Test multiple strategies for the same bandit probabilities
7373probs = [0.4 , 0.9 , 0.8 ]
7474
75- ba = slots.MAB(probs = probs)
76- bb = slots.MAB(probs = probs)
77- bc = slots.MAB(probs = probs)
78- bd = slots.MAB(probs = probs)
75+ strategies = [{' strategy' : ' eps_greedy' , ' regret' : [],
76+ ' label' : ' $\epsilon$-greedy ($\epsilon$=0.1)' },
77+ {' strategy' : ' softmax' , ' regret' : [],
78+ ' label' : ' Softmax ($T$=0.1)' },
79+ {' strategy' : ' ucb' , ' regret' : [],
80+ ' label' : ' UCB1' },
81+ {' strategy' : ' bayesian' , ' regret' : [],
82+ ' label' : ' Bayesian bandit' },
83+ ]
84+
85+ for s in strategies:
86+ s[' mab' ] = slots.MAB(probs = probs)
7987
8088# Run trials and calculate the regret after each trial
81- rega = []
82- regb = []
83- regc = []
84- regd = []
8589for t in range (10000 ):
86- ba._run(' eps_greedy' )
87- rega.append(ba.regret())
88- bb._run(' softmax' )
89- regb.append(bb.regret())
90- bc._run(' ucb' )
91- regc.append(bc.regret())
92- bd._run(' bayesian' )
93- regd.append(bd.regret())
94-
90+ for s in strategies:
91+ s[' mab' ]._run(s[' strategy' ])
92+ s[' regret' ].append(s[' mab' ].regret())
9593
9694# Pretty plotting
9795sns.set_style(' whitegrid' )
9896sns.set_context(' poster' )
9997
10098plt.figure(figsize = (15 ,4 ))
101- plt.plot(rega, label = ' $\epsilon$-greedy ($\epsilon$=0.1) ' )
102- plt.plot(regb, label = ' Softmax ($T$=0.1) ' )
103- plt.plot(regc , label = ' UCB ' )
104- plt.plot(regd, label = ' Bayesian Bandit ' )
99+
100+ for s in strategies:
101+ plt.plot(s[ ' regret ' ] , label = s[ ' label ' ] )
102+
105103plt.legend()
106104plt.xlabel(' Trials' )
107105plt.ylabel(' Regret' )
0 commit comments