-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplaying_kuhn_AI.py
More file actions
179 lines (137 loc) · 7.58 KB
/
playing_kuhn_AI.py
File metadata and controls
179 lines (137 loc) · 7.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
from typing import List, Dict
import random
import numpy as np
import sys
random.seed(4)
Actions = ['B', 'C'] # bet/call vs check/fold
class InformationSet():
def __init__(self):
self.cumulative_regrets = np.zeros(shape=len(Actions))
self.strategy_sum = np.zeros(shape=len(Actions))
self.num_actions = len(Actions)
def normalize(self, strategy: np.array) -> np.array:
"""Normalize a strategy. If there are no positive regrets,
use a uniform random strategy"""
if sum(strategy) > 0:
strategy /= sum(strategy)
else:
strategy = np.array([1.0 / self.num_actions] * self.num_actions)
return strategy
def get_strategy(self, reach_probability: float) -> np.array:
"""Return regret-matching strategy"""
strategy = np.maximum(0, self.cumulative_regrets)
strategy = self.normalize(strategy)
self.strategy_sum += reach_probability * strategy
return strategy
def get_average_strategy(self) -> np.array:
return self.normalize(self.strategy_sum.copy())
class KuhnPoker():
@staticmethod # Static methods do not require a class instance creation. So, they are not dependent on the state of the object.
def is_terminal(history: str) -> bool:
return history in ['BC', 'BB', 'CC', 'CBB', 'CBC']
@staticmethod
def get_payoff(history: str, cards: List[str]) -> int:
"""get payoff for 'active' player in terminal history"""
if history in ['BC', 'CBC']:
return +1
else: # CC or BB or CBB
payoff = 2 if 'B' in history else 1
active_player = len(history) % 2
player_card = cards[active_player]
opponent_card = cards[(active_player + 1) % 2]
if player_card == 'K' or opponent_card == 'J':
return payoff
else:
return -payoff
class KuhnCFRTrainer():
def __init__(self):
self.infoset_map: Dict[str, InformationSet] = {}
def get_information_set(self, card_and_history: str) -> InformationSet:
"""add if needed and return"""
if card_and_history not in self.infoset_map:
self.infoset_map[card_and_history] = InformationSet()
return self.infoset_map[card_and_history]
def cfr(self, cards: List[str], history: str, reach_probabilities: np.array, active_player: int):
if KuhnPoker.is_terminal(history):
return KuhnPoker.get_payoff(history, cards)
my_card = cards[active_player]
#print('active_player: ', active_player, 'my_card + history: ', my_card + history)
info_set = self.get_information_set(my_card + history)
strategy = info_set.get_strategy(reach_probabilities[active_player])
#print('active_player: ', active_player, 'strategy: ', strategy)
opponent = (active_player + 1) % 2
counterfactual_values = np.zeros(len(Actions))
for ix, action in enumerate(Actions):
action_probability = strategy[ix]
# compute new reach probabilities after this action
new_reach_probabilities = reach_probabilities.copy()
new_reach_probabilities[active_player] *= action_probability
# recursively call cfr method, next player to act is the opponent
counterfactual_values[ix] = -self.cfr(cards, history + action, new_reach_probabilities, opponent)
#print('active_player: ', active_player, 'strategy: ', strategy,'infoset: ', my_card + history, 'cards: ', cards, 'history + action: ', history + action, 'action_probability: ', action_probability, 'new_reach_probs: ', new_reach_probabilities, 'new_reach_probs[active_player]: ', new_reach_probabilities[active_player], )
print('active_player: ', active_player, 'strategy: ', strategy,'infoset: ', my_card + history, 'cards: ', cards, 'history + action: ', history + action, 'CF_values[ix]: ', counterfactual_values[ix], 'new_reach_probabilities', new_reach_probabilities)
#print('active_player: ', active_player, 'counterfactual_values: ', counterfactual_values)
# Value of the current game state is just counterfactual values weighted by action probabilities
node_value = counterfactual_values.dot(strategy)
for ix, action in enumerate(Actions):
#print(reach_probabilities[opponent], counterfactual_values[ix], node_value)
info_set.cumulative_regrets[ix] += reach_probabilities[opponent] * (counterfactual_values[ix] - node_value)
##print('active_player: ', active_player)
#print('my_card + history: ', my_card + history)
#print('node_value: ', node_value)
#print('info_set.cumulative_regrets: ', info_set.cumulative_regrets)
#print()
return node_value # counterfactual utility/happiness from being at this game node h
def train(self, num_iterations: int) -> int:
util = 0
kuhn_cards = ['J', 'Q', 'K']
for iter in range(num_iterations):
#print('------------------------------------------------------------------')
#print('iteration: ', iter)
print()
cards = random.sample(kuhn_cards, 2)
history = ''
reach_probabilities = np.ones(2)
util += self.cfr(cards, history, reach_probabilities, 0)
return util
if __name__ == "__main__":
num_iterations = 200
np.set_printoptions(precision=2, floatmode='fixed', suppress=True)
cfr_trainer = KuhnCFRTrainer()
util = cfr_trainer.train(num_iterations)
#print(f"\nRunning Kuhn Poker chance sampling CFR for {num_iterations} iterations")
#print(f"\nExpected average game value (for player 1): {(-1./18):.3f}")
#print(f"Computed average game value : {(util / num_iterations):.3f}\n")
#print("We expect the bet frequency for a Jack to be between 0 and 1/3")
#print("The bet frequency of a King should be three times the one for a Jack\n")
#print(f"History Bet Pass")
for name, info_set in sorted(cfr_trainer.infoset_map.items(), key=lambda s: len(s[0])):
print(f"{name:3}: {info_set.get_average_strategy()}")
# "C:/Users/Jaime GG-B/AppData/Local/Programs/Python/Python39/python.exe" "c:/Users/Jaime GG-B/Downloads/KuhnPoker_CFR copy.py" 100
'''
Code to play AI after it has been trained
'''
num_poker_rounds
p0_wins_counter = 0
p1_wins_counter = 0
for _ in range(num_poker_rounds):
# Deal cards
# compute the strategy according to regret matching
my_card = cards[active_player]
#print('active_player: ', active_player, 'my_card + history: ', my_card + history)
info_set = self.get_information_set(my_card + history)
strategy = info_set.get_strategy(reach_probabilities[active_player])
print('p0_strategy: ', strategy)
# add the strategy to p0 running total of strategy probabilities
strategy_sum += strategy
# Choose p0 action and p0 opponent's action ----------------------------------
p1_action = list(Action)[int(input('Enter 0, 1 or 2: '))] #p1_action = random.choices(list(Action), weights=fixed_p1_strategy)[0] # weights=strategy) for both agents to use the regret matching strategy
print('p1_action: ', p1_action)
p0_action = random.choices(list(Action), weights=strategy)[0]
print('p0_action: ',p0_action)
p0_wins_counter, p1_wins_counter = getScoreAndPrintWinner(p0_action, p1_action, p0_wins_counter, p1_wins_counter)
# compute the payoff and regrets
p0_payoff = get_payoff(p0_action, p1_action)
regrets = get_regrets(p0_payoff, p1_action)
# add regrets from this round to the cumulative regrets
cumulative_regrets += regrets