igorpejic
diff --git a/‎.gitignore‎
Lines changed: 14 additions & 0 deletions b/‎.gitignore‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎alpha/.gitignore‎
Lines changed: 16 additions & 0 deletions b/‎alpha/.gitignore‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎alpha/Arena.py‎
Lines changed: 123 additions & 0 deletions b/‎alpha/Arena.py‎
Lines changed: 123 additions & 0 deletions
diff --git a/‎alpha/Coach.py‎
Lines changed: 162 additions & 0 deletions b/‎alpha/Coach.py‎
Lines changed: 162 additions & 0 deletions
@@ -8,3 +8,17 @@ puzzles.csv
 secrets.json*
 static/
 .ipynb_checkpoints
+
+*.pyc
+alphago/.DS_Store
+
+alphago/temp/
+alphago/.project
+alphago/.pydevproject
+
+alphago/checkpoints/
+alphago/# For PyCharm users
+alphago/.idea/
+alphago/*.swp
+alphago/puzzles.csv
+alphago/results/
@@ -0,0 +1,16 @@
+*.pyc
+.DS_Store
+
+/temp/
+/.project
+/.pydevproject
+
+# checkpoint
+checkpoints/
+
+# For PyCharm users
+.idea/
+
+*.swp
+puzzles.csv
+results/
@@ -0,0 +1,123 @@
+import numpy as np
+from pytorch_classification.utils import Bar, AverageMeter
+import time
+
+class Arena():
+    """
+    An Arena class where any 2 agents can be pit against each other.
+    """
+    def __init__(self, player1, player2, game, display=None):
+        """
+        Input:
+            player 1,2: two functions that takes board as input, return action
+            game: Game object
+            display: a function that takes board as input and prints it (e.g.
+                     display in othello/OthelloGame). Is necessary for verbose
+                     mode.
+
+        see othello/OthelloPlayers.py for an example. See pit.py for pitting
+        human players/other baselines with each other.
+        """
+        self.player1 = player1
+        self.player2 = player2
+        self.game = game
+        self.display = display
+
+    def playGame(self, verbose=False):
+        """
+        Executes one episode of a game.
+
+        Returns:
+            either
+                winner: player who won the game (1 if player1, -1 if player2)
+            or
+                draw result returned from the game that is neither 1, -1, nor 0.
+        """
+        players = [self.player1]
+        curPlayer = 1
+        board, vis_state = self.game.getInitBoard()
+        it = 0
+        game_ended = self.game.getGameEnded(board, curPlayer)
+        while game_ended==0:
+            it+=1
+            if verbose:
+                assert(self.display)
+                print("Turn ", str(it), "Player ", str(curPlayer))
+                self.display(board)
+            action = players[0](self.game.getCanonicalForm(board, curPlayer))
+
+            valids = self.game.getValidMoves(self.game.getCanonicalForm(board, curPlayer),1)
+
+            if valids[action]==0:
+                print(action)
+                assert valids[action] >0
+            board, curPlayer, vis_state = self.game.getNextState(board, curPlayer, action, vis_state)
+            game_ended = self.game.getGameEnded(board, curPlayer)
+
+        print(f'Game ended score {game_ended}')
+        print('Board')
+        print(vis_state)
+
+        if verbose:
+            assert(self.display)
+            print("Game over: Turn ", str(it), "Result ", str(self.game.getGameEnded(board, 1)))
+            self.display(board)
+        return self.game.getGameEnded(board, 1)
+
+    def playGames(self, num, verbose=False):
+        """
+        Plays num games in which player1 starts num/2 games and player2 starts
+        num/2 games.
+
+        Returns:
+            oneWon: games won by player1
+            twoWon: games won by player2
+            draws:  games won by nobody
+        """
+        eps_time = AverageMeter()
+        bar = Bar('Arena.playGames', max=num)
+        end = time.time()
+        eps = 0
+        maxeps = int(num)
+
+        num = int(num/2)
+        oneWon = 0
+        twoWon = 0
+        draws = 0
+        for _ in range(num):
+            gameResult = self.playGame(verbose=verbose)
+            if gameResult==1:
+                oneWon+=1
+            elif gameResult==-1:
+                twoWon+=1
+            else:
+                draws+=1
+            # bookkeeping + plot progress
+            eps += 1
+            eps_time.update(time.time() - end)
+            end = time.time()
+            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg,
+                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
+            bar.next()
+
+        self.player1, self.player2 = self.player2, self.player1
+        
+        for _ in range(num):
+            gameResult = self.playGame(verbose=verbose)
+            if gameResult==-1:
+                oneWon+=1                
+            elif gameResult==1:
+                twoWon+=1
+            else:
+                draws+=1
+            # bookkeeping + plot progress
+            eps += 1
+            eps_time.update(time.time() - end)
+            end = time.time()
+            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg,
+                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
+            bar.next()
+            
+        bar.finish()
+
+        return oneWon, twoWon, draws
@@ -0,0 +1,162 @@
+from collections import deque
+from Arena import Arena
+from MCTS import MCTS
+import numpy as np
+from pytorch_classification.utils import Bar, AverageMeter
+import time, os, sys
+from pickle import Pickler, Unpickler
+from random import shuffle
+
+
+class Coach():
+    """
+    This class executes the self-play + learning. It uses the functions defined
+    in Game and NeuralNet. args are specified in main.py.
+    """
+    def __init__(self, game, nnet, args):
+        self.game = game
+        self.nnet = nnet
+        self.pnet = self.nnet.__class__(self.game)  # the competitor network
+        self.args = args
+        self.mcts = MCTS(self.game, self.nnet, self.args)
+        self.trainExamplesHistory = []    # history of examples from args.numItersForTrainExamplesHistory latest iterations
+        self.skipFirstSelfPlay = False # can be overriden in loadTrainExamples()
+
+    def executeEpisode(self):
+        """
+        This function executes one episode of self-play, starting with player 1.
+        As the game is played, each turn is added as a training example to
+        trainExamples. The game is played till the game ends. After the game
+        ends, the outcome of the game is used to assign values to each example
+        in trainExamples.
+
+        It uses a temp=1 if episodeStep < tempThreshold, and thereafter
+        uses temp=0.
+
+        Returns:
+            trainExamples: a list of examples of the form (canonicalBoard,pi,v)
+                           pi is the MCTS informed policy vector, v is +1 if
+                           the player eventually won the game, else -1.
+        """
+        trainExamples = []
+        board, vis_state = self.game.getInitBoard()
+        self.curPlayer = 1
+        episodeStep = 0
+
+        while True:
+            episodeStep += 1
+            canonicalBoard = self.game.getCanonicalForm(board,self.curPlayer)
+            temp = int(episodeStep < self.args.tempThreshold)
+
+            pi = self.mcts.getActionProb(canonicalBoard, temp=temp)
+            sym = self.game.getSymmetries(canonicalBoard, pi)
+            for b,p in sym:
+                trainExamples.append([b[0], self.curPlayer, p, None])
+
+            action = np.random.choice(len(pi), p=pi)
+            board, self.curPlayer, vis_state = self.game.getNextState(board, self.curPlayer, action)
+
+            r = self.game.getGameEnded(board, self.curPlayer)
+
+            if r!=0:
+                # return [(x[0],x[2],r*((-1)**(x[1]!=self.curPlayer))) for x in trainExamples]
+                return [(x[0],x[2],r) for x in trainExamples]
+
+    def learn(self):
+        """
+        Performs numIters iterations with numEps episodes of self-play in each
+        iteration. After every iteration, it retrains neural network with
+        examples in trainExamples (which has a maximium length of maxlenofQueue).
+        It then pits the new neural network against the old one and accepts it
+        only if it wins >= updateThreshold fraction of games.
+        """
+
+        for i in range(1, self.args.numIters+1):
+            # bookkeeping
+            print('------ITER ' + str(i) + '------')
+            # examples of the iteration
+            if not self.skipFirstSelfPlay or i>1:
+                iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)
+    
+                eps_time = AverageMeter()
+                bar = Bar('Self Play', max=self.args.numEps)
+                end = time.time()
+    
+                for eps in range(self.args.numEps):
+                    self.mcts = MCTS(self.game, self.nnet, self.args)   # reset search tree
+                    iterationTrainExamples += self.executeEpisode()
+    
+                    # bookkeeping + plot progress
+                    eps_time.update(time.time() - end)
+                    end = time.time()
+                    bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg,
+                                                                                                               total=bar.elapsed_td, eta=bar.eta_td)
+                    bar.next()
+                bar.finish()
+
+                # save the iteration examples to the history 
+                self.trainExamplesHistory.append(iterationTrainExamples)
+                
+            if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
+                print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
+                self.trainExamplesHistory.pop(0)
+            # backup history to a file
+            # NB! the examples were collected using the model from the previous iteration, so (i-1)  
+            self.saveTrainExamples(i-1)
+            
+            # shuffle examples before training
+            trainExamples = []
+            for e in self.trainExamplesHistory:
+                trainExamples.extend(e)
+            shuffle(trainExamples)
+
+            # training new network, keeping a copy of the old one
+            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
+            self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
+            pmcts = MCTS(self.game, self.pnet, self.args)
+            
+            self.nnet.train(trainExamples)
+            nmcts = MCTS(self.game, self.nnet, self.args)
+
+            print('PITTING AGAINST PREVIOUS VERSION')
+            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
+                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)
+            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)
+
+            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
+            #if pwins+nwins == 0 or float(nwins)/(pwins+nwins) < self.args.updateThreshold:
+            if False:
+                print('REJECTING NEW MODEL')
+                self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
+            else:
+                print('ACCEPTING NEW MODEL')
+                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
+                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')                
+
+    def getCheckpointFile(self, iteration):
+        return 'checkpoint_' + str(iteration) + '.pth.tar'
+
+    def saveTrainExamples(self, iteration):
+        folder = self.args.checkpoint
+        if not os.path.exists(folder):
+            os.makedirs(folder)
+        filename = os.path.join(folder, self.getCheckpointFile(iteration)+".examples")
+        with open(filename, "wb+") as f:
+            Pickler(f).dump(self.trainExamplesHistory)
+        f.closed
+
+    def loadTrainExamples(self):
+        modelFile = os.path.join(self.args.load_folder_file[0], self.args.load_folder_file[1])
+        examplesFile = modelFile+".examples"
+        if not os.path.isfile(examplesFile):
+            print(examplesFile)
+            r = input("File with trainExamples not found. Continue? [y|n]")
+            if r != "y":
+                sys.exit()
+        else:
+            print("File with trainExamples found. Read it.")
+            with open(examplesFile, "rb") as f:
+                self.trainExamplesHistory = Unpickler(f).load()
+            f.closed
+            # examples based on the model were already collected (loaded)
+            self.skipFirstSelfPlay = True