artificial-intelligence/Projects/3_Adversarial Search/my_custom_player.py at master · adsarode/artificial-intelligence · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import random
import time
from sample_players import DataPlayer
#from isolation.isolation import _WIDTH, _HEIGHT, _SIZE
import isolation as isolation
from collections import defaultdict
import sys, math

#CENTER = (_WIDTH*_HEIGHT-3)/2
#xPenalty = {0:0, 1:0,2:0, 3:0, 4:0.25,5:0.5, 6:1.0} # Index 6 is not required for game play, it is required for my testing.
#yPenalty = {0:0, 1:0,2:0, 3:0.25,4:0.5}
#node_counter = 0

# https://int8.io/monte-carlo-tree-search-beginners-guide/
# http://mcts.ai/about/

class CustomPlayer(DataPlayer):
    """ Implement your own agent to play knight's Isolation

    The get_action() method is the only required method for this project.
    You can modify the interface for get_action by adding named parameters
    with default values, but the function MUST remain compatible with the
    default interface.

    **********************************************************************
    NOTES:
    - The test cases will NOT be run on a machine with GPU access, nor be
      suitable for using any other machine learning techniques.

    - You can pass state forward to your agent on the next turn by assigning
      any pickleable object to the self.context attribute.
    **********************************************************************
    """
    def get_action(self, state):
        """ Employ an adversarial search technique to choose an action
        available in the current state calls self.queue.put(ACTION) at least

        This method must call self.queue.put(ACTION) at least once, and may
        call it as many times as you want; the caller will be responsible
        for cutting off the function after the search time limit has expired.

        See RandomPlayer and GreedyPlayer in sample_players for more examples.

        **********************************************************************
        NOTE:
        - The caller is responsible for cutting off search, so calling
          get_action() from your own code will create an infinite loop!
          Refer to (and use!) the Isolation.play() function to run games.
        **********************************************************************
        """
        # TODO: Replace the example implementation below with your own search
        #       method by combining techniques from lecture
        #
        # EXAMPLE: choose a random move without any search--this function MUST
        #          call self.queue.put(ACTION) at least once before time expires
        #          (the timer is automatically managed for you)
        if state.ply_count < 2:
            self.queue.put(random.choice(state.actions()))
        else:
            ''' Monte Carlo Tree Search '''
            self.queue.put(random.choice(state.actions()))  # Ensure that valid action is added to queue at least onece
            if not state.terminal_test():
                mcts = MonteCarloTreeSearch(IsolationTreeNode(state), bias_param_const = 1.25)
                while True:
                    mcts.run()
                    best_action = mcts.best_action()
                    #print("Based on ", node_counter, " nodes")
                    self.queue.put(best_action)

                ''' Iterative Alpha Beta Prunning Search '''
                '''for depth in range(3, 10):  # Minimax plays with depth 3, so this depth is always possible
                    best_action = self.alpha_beta_search(state, depth)
                    self.queue.put(best_action)'''


    def alpha_beta_search(self, state, depth):
        def min_value(state, alpha, beta, depth):
            if state.terminal_test(): return state.utility(self.player_id)
            if depth <= 0: return self.score(state)
            value = float("inf")
            for action in state.actions():
                value = min(value, max_value(state.result(action), alpha, beta, depth - 1))
                if value <= alpha:
                    return value
                beta = min(beta, value)
            return value

        def max_value(state, alpha, beta, depth):
            if state.terminal_test(): return state.utility(self.player_id)
            if depth <= 0: return self.score(state)
            value = float("-inf")
            for action in state.actions():
                value = max(value, min_value(state.result(action), alpha, beta, depth - 1))
                if value >= beta:
                    return value
                alpha = max(alpha, value)
            return value

        alpha = float("-inf")
        beta = float("inf")
        best_value = float("-inf")  # This is actually a duplicate of alpha : can be removed
        best_action = None
        for action in state.actions():
            value = min_value(state.result(action), alpha, beta, depth-1)
            alpha = max(alpha, value)
            if value >= best_value:
                best_value = value
                best_action = action
        return best_action

    def score(self, state):
        own_loc = state.locs[self.player_id]
        opp_loc = state.locs[1 - self.player_id]
        own_liberties = state.liberties(own_loc)
        opp_liberties = state.liberties(opp_loc)
        #intersect = set(own_liberties).intersection(opp_liberties)
        #return len(own_liberties) - len(opp_liberties) + len(intersect)
        #return len(own_liberties) - len(intersect)
        #dy = round((own_loc-57)/13)
        #dx = round(own_loc - 13*dy - 57)
        #distance_penalty = xPenalty[abs(dx)] + yPenalty[abs(dy)]
        # print("dist {}, dx {}, dy {}, own_loc {}".format(distance_from_centre, dx, dy, own_loc))

        return len(own_liberties) - len(opp_liberties) # - distance_penalty

class IsolationTreeNode():
    def __init__(self, state: isolation, parent=None):
        self.state = state
        self.parent = parent
        self.children = {}
        self.untried_actions = self.state.actions()
        self.total_reward = 0
        self.num_visits = 0
        #global node_counter
        #node_counter += 1

    ''' Expand using random untried action '''
    def expand(self):
        #print("IsolationTreeNode.expand")
        #print(len(self.untried_actions), " Should be more than 0")
        next_action = self.untried_actions.pop(random.randint(0, len(self.untried_actions)-1))
        new_child = IsolationTreeNode(self.state.result(next_action), parent=self)
        # print(next_action, self.state, new_child.state)
        self.children[new_child] = next_action
        return new_child

    ''' Best child with highest UCT best_score
        UCT - choice based on exploration and exploitation balance '''
    def best_uct(self, bias_param):
        #print("IsolationTreeNode.best_uct")
        maxUCT = float('-inf')
        bestChild = None
        for child in self.children:
            uct = child.total_reward/child.num_visits + bias_param * math.sqrt(math.log(self.num_visits)/child.num_visits)
            if uct > maxUCT:
                bestChild = child
                maxUCT = uct
        return bestChild

    ''' Best action based on the results so far '''
    def best_action(self):
        # return max(child.num_visits for child in self.children)

        # if not self.children:
        #     print("Empty children list", self.state)
        #     print("Is this a terminal node", self.is_terminal())
        #     print("**********************************************")
        best_child = max(self.children.keys(), key = lambda item: item.num_visits)
        return self.children[best_child]

    ''' is Leaf node '''
    def is_terminal(self):
        #print("IsolationTreeNode.is_terminal")
        return self.state.terminal_test()

class MonteCarloTreeSearch():
    def __init__(self, state:IsolationTreeNode, bias_param_const):
        # print("MonteCarloTreeSearch.init")
        self.root = state
        self.bias_param_const = bias_param_const

    ''' Choose next node for simulation '''
    def tree_policy(self):
        # print("MonteCarloTreeSearch.tree_policy")
        current_node = self.root
        while not current_node.is_terminal():
            if current_node.untried_actions:
                return current_node.expand()
            else:
                current_node = current_node.best_uct(bias_param = self.bias_param_const)
        return current_node

    ''' Rollout policy to choose from available actions '''
    def rollout_policy(self, available_actions):
        # print("MonteCarloTreeSearch.rollout_policy")
        return random.choice(available_actions)

    ''' Parent of node chooses reward value when choosing next action
        Win for this node is lose for parent and vice-versa '''
    def reward_value_for_parent(self, state):
        # print("MonteCarloTreeSearch.reward_value_for_parent")
        if state.utility(state.player())==float('inf'):
            return -1.0 # Negative reward for Win
        elif state.utility(state.player())==float('-inf'):
            return 1.0  # Positive reward for Loss
        return 0.0      # Draw

    ''' Complete one game/ simulation till terminal state '''
    def simulate(self, start_node):
        # print("MonteCarloTreeSearch.simulate")
        state = start_node.state
        # print(state)
        while not state.terminal_test():
            state = state.result(self.rollout_policy(state.actions()))
        return self.reward_value_for_parent(state)

    ''' Backpropogate the reward to parents till root '''
    def backpropagate(self, node, reward):
        #print("MonteCarloTreeSearch.backpropagate")
        node.num_visits += 1
        node.total_reward += reward
        if node.parent:
            self.backpropagate(node.parent, -reward)

    ''' Run Monte Carlo Tree Search Simulation '''
    def run(self):
        #print("MonteCarloTreeSearch.run")
        start_node = self.tree_policy()
        reward = self.simulate(start_node)
        # print("Start State = ", start_node.state, " Reward = ", reward )
        self.backpropagate(start_node, reward)

    ''' Return the best action from the simulations run so far '''
    def best_action(self):
        # print("MonteCarloTreeSearch.best_action")
        # best_child_node = self.root.best_uct(bias_param = 0) # Exploitation Only
        return self.root.best_action()