@@ -23,7 +23,7 @@ class DBS(Player):
2323 violation_threshold and rejection_threshold
2424
2525 Parameters
26- ----------
26+
2727 discount_factor : float, optional
2828 used when computing discounted frequencies to learn opponent's
2929 strategy. Must be between 0 and 1. The default is 0.75
@@ -100,6 +100,9 @@ def reset(self):
100100 self .history_by_cond [(D , D )] = ([0 ], [1 ])
101101
102102 def should_promote (self , r_plus , promotion_threshold = 3 ):
103+ """
104+
105+ """
103106 if r_plus [1 ] == C :
104107 opposite_action = 0
105108 elif r_plus [1 ] == D :
@@ -119,7 +122,7 @@ def should_promote(self, r_plus, promotion_threshold=3):
119122 if (self .history_by_cond [r_plus [0 ]][1 ][1 :][- k ] == 1 ):
120123 count += 1
121124 k += 1
122- if (count >= promotion_threshold ):
125+ if (count >= promotion_threshold ):
123126 return True
124127 return False
125128
@@ -147,14 +150,13 @@ def compute_prob_rule(self, outcome, alpha):
147150 discounted_f = 0
148151 alpha_k = 1
149152 for g ,f in zip (G [::- 1 ], F [::- 1 ]):
150- discounted_g += alpha_k * g
151- discounted_f += alpha_k * f
152- alpha_k = alpha * alpha_k
153+ discounted_g += alpha_k * g
154+ discounted_f += alpha_k * f
155+ alpha_k = alpha * alpha_k
153156 p_cond = discounted_g / discounted_f
154157 return p_cond
155158
156159 def strategy (self , opponent : Player ) -> Action :
157- """This is the actual strategy"""
158160
159161 # First move
160162 if not self .history :
@@ -195,14 +197,14 @@ def strategy(self, opponent: Player) -> Action:
195197
196198 # r+ in Rc
197199 r_plus_in_Rc = (
198- r_plus [0 ] in self .Rc .keys ()
199- and self .Rc [r_plus [0 ]] == action_to_int (r_plus [1 ])
200- )
200+ r_plus [0 ] in self .Rc .keys ()
201+ and self .Rc [r_plus [0 ]] == action_to_int (r_plus [1 ])
202+ )
201203 # r- in Rd
202204 r_minus_in_Rd = (
203- r_minus [0 ] in self .Rd .keys ()
204- and self .Rd [r_minus [0 ]] == action_to_int (r_minus [1 ])
205- )
205+ r_minus [0 ] in self .Rd .keys ()
206+ and self .Rd [r_minus [0 ]] == action_to_int (r_minus [1 ])
207+ )
206208
207209 if r_minus_in_Rd :
208210 self .v += 1
@@ -217,7 +219,7 @@ def strategy(self, opponent: Player) -> Action:
217219 all_cond = [(C , C ), (C , D ), (D , C ), (D , D )]
218220 for outcome in all_cond :
219221 if ((outcome not in self .Rc .keys ())
220- and (outcome not in self .Rd .keys ())):
222+ and (outcome not in self .Rd .keys ())):
221223 # then we need to compute opponent's C answer probability
222224 Rp [outcome ] = self .compute_prob_rule (outcome , self .alpha )
223225
@@ -229,13 +231,13 @@ def strategy(self, opponent: Player) -> Action:
229231
230232 # React to the opponent's last move
231233 return MoveGen ((self .history [- 1 ], opponent .history [- 1 ]), self .Pi ,
232- depth_search_tree = self .tree_depth )
234+ depth_search_tree = self .tree_depth )
233235
234236
235237class Node (object ):
236238 """
237239 Nodes used to build a tree for the tree-search procedure
238- The tree has Determinist ans Stochastic nodes, as the opponent's
240+ The tree has Deterministic ans Stochastic nodes, as the opponent's
239241 strategy is learned as a probability distribution
240242 """
241243
@@ -262,17 +264,17 @@ def __init__(self, own_action, pC, depth):
262264
263265 def get_siblings (self ):
264266 # siblings of a stochastic node get depth += 1
265- opponent_c_choice = DeterministNode (self .own_action , C , self .depth + 1 )
266- opponent_d_choice = DeterministNode (self .own_action , D , self .depth + 1 )
267+ opponent_c_choice = DeterministicNode (self .own_action , C , self .depth + 1 )
268+ opponent_d_choice = DeterministicNode (self .own_action , D , self .depth + 1 )
267269 return (opponent_c_choice , opponent_d_choice )
268270
269271 def is_stochastic (self ):
270272 return True
271273
272274
273- class DeterministNode (Node ):
275+ class DeterministicNode (Node ):
274276 """
275- Nodes (C, C), (C, D), (D, C), or (D, D) with determinist choice
277+ Nodes (C, C), (C, D), (D, C), or (D, D) with deterministic choice
276278 for siblings
277279 """
278280
@@ -288,11 +290,11 @@ def get_siblings(self, policy):
288290 same depth
289291 """
290292 c_choice = StochasticNode (
291- C , policy [(self .action1 , self .action2 )], self .depth
292- )
293+ C , policy [(self .action1 , self .action2 )], self .depth
294+ )
293295 d_choice = StochasticNode (
294- D , policy [(self .action1 , self .action2 )], self .depth
295- )
296+ D , policy [(self .action1 , self .action2 )], self .depth
297+ )
296298 return (c_choice , d_choice )
297299
298300 def is_stochastic (self ):
@@ -315,12 +317,7 @@ def create_policy(pCC, pCD, pDC, pDD):
315317 where p is the probability to cooperate after prev_move,
316318 where prev_move can be (C, C), (C, D), (D, C) or (D, D)
317319 """
318- pol = {}
319- pol [(C , C )] = pCC
320- pol [(C , D )] = pCD
321- pol [(D , C )] = pDC
322- pol [(D , D )] = pDD
323- return pol
320+ return {(C , C ): pCC , (C , D ): pCD , (D , C ): pDC , (D , D ): pDD }
324321
325322
326323def action_to_int (action ):
@@ -343,16 +340,12 @@ def minimax_tree_search(begin_node, policy, max_depth):
343340 # The stochastic node value is the expected values of siblings
344341 node_value = (
345342 begin_node .pC * minimax_tree_search (
346- siblings [0 ],
347- policy ,
348- max_depth )
343+ siblings [0 ], policy , max_depth )
349344 + (1 - begin_node .pC ) * minimax_tree_search (
350- siblings [1 ],
351- policy ,
352- max_depth )
345+ siblings [1 ], policy , max_depth )
353346 )
354347 return node_value
355- else : # determinist node
348+ else : # deterministic node
356349 if begin_node .depth == max_depth :
357350 # this is an end node, we just return its outcome value
358351 return begin_node .get_value ()
@@ -368,7 +361,7 @@ def minimax_tree_search(begin_node, policy, max_depth):
368361 )
369362 elif begin_node .depth < max_depth :
370363 siblings = begin_node .get_siblings (policy )
371- # the determinist node value is the max of both siblings values
364+ # the deterministic node value is the max of both siblings values
372365 # + the score of the outcome of the node
373366 a = minimax_tree_search (siblings [0 ], policy , max_depth )
374367 b = minimax_tree_search (siblings [1 ], policy , max_depth )
@@ -381,7 +374,7 @@ def MoveGen(outcome, policy, depth_search_tree=5):
381374 returns the best move considering opponent's policy and last move,
382375 using tree-search procedure
383376 """
384- current_node = DeterministNode (outcome [0 ], outcome [1 ], depth = 0 )
377+ current_node = DeterministicNode (outcome [0 ], outcome [1 ], depth = 0 )
385378 values_of_choices = minimax_tree_search (
386379 current_node , policy , depth_search_tree )
387380 # returns the Action which correspond to the best choice in terms of
0 commit comments