1+ """QLearning Agent."""
2+
3+ from .learning_agent import ReinforcementAgent
4+ from collections import defaultdict
5+ import random
6+ import chess
7+ from utils import flipCoin
8+ from evaluation import evaluate
9+
10+ class QLearningAgent (ReinforcementAgent ):
11+ """Q-Learning Agent."""
12+ def __init__ (self , ** args ):
13+ ReinforcementAgent .__init__ (self , ** args )
14+ self .q_values = defaultdict (float )
15+ self .train ()
16+
17+ def choose_move (self , board : chess .Board ) -> chess .Move :
18+ """
19+ Determine best move based on current board state and QValues.
20+
21+ Args:
22+ board: The chess board to get the move for.
23+
24+ Returns:
25+ chess.Move: The move QLearningAgent decides upon
26+ """
27+ return self .computeActionFromQValues (board )
28+
29+ def getQValue (self , board : chess .Board , action : chess .Move ) -> float :
30+ """
31+ Returns the Qvalue for a given state and action
32+
33+ Should return 0.0 if we have never seen a state
34+ or the Q node value otherwise
35+
36+ Args:
37+ board: The board to get the value for
38+ action: The move to take from the state
39+ """
40+ return self .q_values [(board .fen (), action )]
41+
42+
43+ def computeValueFromQValues (self , board : chess .Board ) -> float :
44+ """
45+ Returns the highest value from the given board. Minimizes values if color is black, otherwise maximize.
46+
47+ Args:
48+ board: The state from which to return the best value
49+ """
50+ if board .is_game_over () or not board .legal_moves :
51+ return 0.0
52+
53+ values = []
54+ for move in board .legal_moves :
55+ board .push (move )
56+ values .append (self .getQValue (board , move ))
57+ board .pop ()
58+
59+ return min (values ) if self .color == chess .BLACK else max (values )
60+
61+
62+ def computeActionFromQValues (self , board : chess .Board ) -> chess .Move :
63+ """
64+ Compute the best action to take in a state (Minimizes if color is black, maximizes otherwise).
65+
66+ Args:
67+ board: The state from which to return the best action
68+ """
69+ board .turn = self .color
70+ if board .is_game_over () or not board .legal_moves :
71+ return None
72+
73+ bestVal = float ('inf' ) if self .color == chess .BLACK else float ('-inf' )
74+ bestMoves = []
75+ for move in board .legal_moves :
76+ board .push (move )
77+ curVal = self .getQValue (board , move )
78+ board .pop ()
79+ if curVal == bestVal :
80+ bestMoves .append (move )
81+ if curVal < bestVal and self .color == chess .BLACK :
82+ bestVal = curVal
83+ bestMoves = [move ]
84+ elif curVal > bestVal and self .color == chess .WHITE :
85+ bestVal = curVal
86+ bestMoves = [move ]
87+
88+ return random .choice (bestMoves )
89+
90+ def getAction (self , board : chess .Board ) -> chess .Move :
91+ """
92+ Compute the action to take in the current state.
93+
94+ Args:
95+ board: the state from which the best action should be chosen.
96+ """
97+ # Pick Action
98+ board .turn = self .color
99+ action = None
100+ if flipCoin (self .epsilon ):
101+ action = random .choice (list (board .legal_moves ))
102+ else :
103+ action = self .computeActionFromQValues (board )
104+
105+ return action
106+
107+ def final (self , board : chess .Board ):
108+ """
109+ Called after episode
110+
111+ Args:
112+ The ending state of the board
113+ """
114+ deltaReward = evaluate (board ) - evaluate (self .lastState )
115+ self .observeTransition (self .lastState , self .lastAction , board , deltaReward )
116+ self .stopEpisode ()
117+
118+ def registerInitialState (self , board : chess .Board ):
119+ """Start training."""
120+ self .startEpisode ()
121+
122+ def update (self , board : chess .Board , action : chess .Move , nextBoard : chess .Board , reward : int ):
123+ """
124+ Performs state update
125+
126+ state = action => nextState and reward transition.
127+
128+ Args:
129+ board: The current state of the board
130+ action: The chosen action to
131+ nextBoard: The board state after performing the action
132+ reward: The reward for the action taken
133+
134+ """
135+ sample = reward + self .discount * self .computeValueFromQValues (nextBoard )
136+ self .q_values [(board .fen (), action )] = (1 - self .alpha )* self .getQValue (board , action )+ self .alpha * sample
137+
138+ def train (self ):
139+ """
140+ Train a QLearning agent against an opponent making random moves
141+ """
142+ for _ in range (self .numTraining ):
143+ board = chess .Board ()
144+ self .startEpisode ()
145+ while not board .is_game_over ():
146+ if board .turn == self .color :
147+ state = board .copy ()
148+ action = self .getAction (state )
149+ self .doAction (state , action )
150+ board .push (action )
151+ nextState = board
152+ reward = evaluate (nextState ) - evaluate (state )
153+ self .observeTransition (state , action , nextState , reward )
154+ else :
155+ opp_moves = list (board .legal_moves )
156+ if opp_moves :
157+ board .push (random .choice (opp_moves ))
158+ self .final (board )
0 commit comments