-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathagent.py
More file actions
245 lines (205 loc) · 9.16 KB
/
agent.py
File metadata and controls
245 lines (205 loc) · 9.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
import numpy as np
from game import ConnectFour
from neural_network import NeuralNetwork
import copy
class Agent():
def __init__(
self,
is_player_one: bool,
game=None,
results_scores={'win': 30, 'loss': -30, 'draw': -3, 'ongoing': -0.001, 'three_in_a_row': 0.75, 'block': 1}
):
# Check results scores is valid
if results_scores['win'] is None or results_scores['loss'] is None or results_scores['draw'] is None or results_scores['ongoing'] is None or results_scores['three_in_a_row'] is None:
raise Exception
# Initialize values
self._game = game
self._is_player_one = is_player_one
self._results_scores = results_scores
self._prev_state_threes_player = 0
self._prev_state_threes_opponent = 0
def evaluate_reward(self):
# REWARDS FOR GAME RESULTS
# Player 1 wins
reward = 0
if self._game.get_winner() == 1:
if self._is_player_one:
reward += self._results_scores['win']
else:
reward += self._results_scores['loss']
# Player 2 wins
elif self._game.get_winner() == -1:
if self._is_player_one:
reward += self._results_scores['loss']
else:
reward += self._results_scores['win']
# Draw
elif self._game.get_winner() == 2:
reward += self._results_scores['draw']
# Game is not complete
reward += self._results_scores['ongoing']
# REWARDS FOR GETTING 3 IN A ROW
curr_state_threes_player = 0
curr_state_threes_opponent = 0
board_state = self._game.get_board()
# Check rows
for i in range(len(board_state)):
for j in range(len(board_state[0]) - 2):
if board_state[i][j] == board_state[i][j+1] and board_state[i][j+1] == board_state[i][j+2] and not board_state[i][j] == 0:
if board_state[i][j] == 1 and self._is_player_one or board_state[i][j] == -1 and not self._is_player_one:
curr_state_threes_player += 1
else:
curr_state_threes_opponent += 1
# Check columns
for i in range(len(board_state) - 2):
for j in range(len(board_state[0])):
if board_state[i][j] == board_state[i+1][j] and board_state[i+1][j] == board_state[i+2][j] and not board_state[i][j] == 0:
if board_state[i][j] == 1 and self._is_player_one or board_state[i][j] == -1 and not self._is_player_one:
curr_state_threes_player += 1
else:
curr_state_threes_opponent += 1
# Check BL-TR diagonals
for i in range(len(board_state) - 2):
for j in range(len(board_state[0]) - 2):
if board_state[i][j] == board_state[i+1][j+1] and board_state[i+1][j+1] == board_state[i+2][j+2] and not board_state[i][j] == 0:
if board_state[i][j] == 1 and self._is_player_one or board_state[i][j] == -1 and not self._is_player_one:
curr_state_threes_player += 1
else:
curr_state_threes_opponent += 1
# Check TL-BR diagonals
for i in range(len(board_state) - 2):
for j in range(len(board_state[0]) - 2):
if board_state[i+2][j] == board_state[i+1][j+1] and board_state[i+1][j+1] == board_state[i][j+2] and not board_state[i+2][j] == 0:
if board_state[i+2][j] == 1 and self._is_player_one or board_state[i+2][j] == -1 and not self._is_player_one:
curr_state_threes_player += 1
else:
curr_state_threes_opponent += 1
# Calculate the new number of threes achieved for opponent and player and update reward accordingly
reward += (curr_state_threes_player - self._prev_state_threes_player) * self._results_scores['three_in_a_row']
reward -= (curr_state_threes_opponent - self._prev_state_threes_opponent) * self._results_scores['three_in_a_row'] # CHANGED WEIGHTING OF REWARDS FOR THREE IN A ROW AGAINST AGENT
# Update counts of number of threes
self._prev_state_threes_player = curr_state_threes_player
self._prev_state_threes_opponent = curr_state_threes_opponent
# Return reward value
return reward
def sense(self):
return self._game.get_moves()
def act(self, eps):
pass
def reinforce(self):
pass
def set_game(self, game):
self._game = game
def set_player_number(self, player_num: int):
if player_num not in range(1, 3):
raise Exception
if player_num == 1:
self._is_player_one = True
else:
self._is_player_one = False
def is_player_one(self):
return self._is_player_one
def reset_prev_state_threes_counts(self):
self._prev_state_threes_player = 0
self._prev_state_threes_opponent = 0
class UserAgent(Agent):
def __init__(
self,
is_player_one: bool,
results_scores={'win': 30, 'loss': -30, 'draw': -3, 'ongoing': -0.001, 'three_in_a_row': 1},
game=None
):
super().__init__(is_player_one, game, results_scores)
def act(self, eps):
while True:
try:
move = int(input("Please input a move: "))
except:
print("Error: please input an integer in the range [0, 7)")
if self._game.is_legal_move(move):
return move
print("Error: please input a legal move")
class RandomAgent(Agent):
def __init__(
self,
is_player_one: bool,
results_scores={'win': 30, 'loss': -30, 'draw': -3, 'ongoing': -0.001, 'three_in_a_row': 1},
game=None
):
super().__init__(is_player_one, game, results_scores)
def act(self, eps):
# Get number of legal moves
moves = self.sense()
num_legal_moves = 0
for move in moves:
if move:
num_legal_moves += 1
# Select a random legal move
selected_move_num = np.random.randint(low=0, high=num_legal_moves)
for i, move in enumerate(moves):
if move:
selected_move_num -= 1
if selected_move_num < 0:
return i
# No legal moves
raise Exception("Error: no legal moves available")
class ConnectFourAgent(Agent):
def __init__(
self,
is_player_one: bool,
nn: NeuralNetwork,
results_scores={'win': 30, 'loss': -30, 'draw': -3, 'ongoing': -0.001, 'three_in_a_row': 0.75},
game=None
):
super().__init__(is_player_one, game, results_scores)
self.nn_pred = nn
self.nn_target = nn
def act(self, eps):
# Select move
rand_val = np.random.rand()
if rand_val < eps: # Select a random legal move
# Get number of legal moves
moves = self.sense()
num_legal_moves = 0
for move in moves:
if move:
num_legal_moves += 1
# Select a random legal move
selected_move_num = np.random.randint(low=0, high=num_legal_moves)
for i, move in enumerate(moves):
if move:
selected_move_num -= 1
if selected_move_num < 0:
return i
# No legal moves
raise Exception("Error: no legal moves available")
else: # Select the move with the highest Q-Value
# Get Q-Values
board_state = self._game.get_board().flatten()
nn_input = np.concatenate((board_state, np.array([1 if self._game.get_is_player_one_turn() else -1])))
q_values = self.nn_pred.feed_forward(nn_input)
# Sort (Action, Q-Value) in descending order
q_val_actions = np.zeros((7, 2))
for i in range(len(q_val_actions)):
q_val_actions[i][0] = i
q_val_actions[i][1] = q_values[i]
for o in range(len(q_val_actions) - 1):
for i in range(o+1, len(q_val_actions)):
if q_val_actions[i][1] > q_val_actions[o][1]:
temp = q_val_actions[i].copy()
q_val_actions[i] = q_val_actions[o].copy()
q_val_actions[o] = temp
# Choose legal move
for action in q_val_actions:
if self._game.is_legal_move(int(action[0])):
return int(action[0])
# Raise exception because no legal move was found
raise Exception("Error: no legal moves available")
def get_nn_pred_q_values(self, nn_input):
return self.nn_pred.feed_forward(nn_input)
def get_nn_target_q_values(self, nn_input):
return self.nn_target.feed_forward(nn_input)
def copy_to_target(self):
self.nn_target = copy.copy(self.nn_pred)
def reinforce(self, lr, delta):
self.nn_pred.update_network(lr, delta)