initial commit for AI opponent using ISMCTS

2016-05-16 16:55:40 +02:00
parent a6f2c07403
commit 8343737cb4
8 changed files with 500 additions and 34 deletions
--- a/ISMCTS.py
+++ b/ISMCTS.py
@ -0,0 +1,332 @@
+# This is a very simple Python 2.7 implementation of the Information Set Monte Carlo Tree Search algorithm.
+# The function ISMCTS(rootstate, itermax, verbose = False) is towards the bottom of the code.
+# It aims to have the clearest and simplest possible code, and for the sake of clarity, the code
+# is orders of magnitude less efficient than it could be made, particularly by using a 
+# state.GetRandomMove() or state.DoRandomRollout() function.
+# 
+# An example GameState classes for Knockout Whist is included to give some idea of how you
+# can write your own GameState to use ISMCTS in your hidden information game.
+# 
+# Written by Peter Cowling, Edward Powley, Daniel Whitehouse (University of York, UK) September 2012 - August 2013.
+# 
+# Licence is granted to freely use and distribute for any sensible/legal purpose so long as this comment
+# remains in any distributed code.
+# 
+# For more information about Monte Carlo Tree Search check out our web site at www.mcts.ai
+# Also read the article accompanying this code at ***URL HERE***
+
+from math import *
+import random, sys
+from game import Game as UNOGame
+from player import Player as UNOPlayer
+from utils import list_subtract_unsorted
+import card as c
+
+
+class GameState:
+    """ A state of the game, i.e. the game board. These are the only functions which are
+        absolutely necessary to implement ISMCTS in any imperfect information game,
+        although they could be enhanced and made quicker, for example by using a
+        GetRandomMove() function to generate a random move during rollout.
+        By convention the players are numbered 1, 2, ..., self.numberOfPlayers.
+    """
+
+    def __init__(self):
+        pass
+
+    def GetNextPlayer(self, p):
+        """ Return the player to the left of the specified player
+        """
+        raise NotImplementedError()
+
+    def Clone(self):
+        """ Create a deep clone of this game state.
+        """
+        raise NotImplementedError()
+
+    def CloneAndRandomize(self, observer):
+        """ Create a deep clone of this game state, randomizing any information not visible to the specified observer player.
+        """
+        raise NotImplementedError()
+
+    def DoMove(self, move):
+        """ Update a state by carrying out the given move.
+            Must update playerToMove.
+        """
+        raise NotImplementedError()
+
+    def GetMoves(self):
+        """ Get all possible moves from this state.
+        """
+        raise NotImplementedError()
+
+    def GetResult(self, player):
+        """ Get the game result from the viewpoint of player.
+        """
+        raise NotImplementedError()
+
+    def __repr__(self):
+        """ Don't need this - but good style.
+        """
+        pass
+
+
+class UNOState(GameState):
+    """ A state of the game UNO.
+    """
+
+    def __init__(self, game):
+        """ Initialise the game state. n is the number of players (from 2 to 7).
+        """
+        self.game = game
+
+    @property
+    def playerToMove(self):
+        return self.game.current_player
+
+    @property
+    def numberOfPlayers(self):
+        return len(self.game.players)
+
+    def CloneAndRandomize(self, observer):
+        """ Create a deep clone of this game state.
+        """
+        game = UNOGame(None)
+        game.deck.cards.append(game.last_card)
+        game.draw_counter = self.game.draw_counter
+
+        game.last_card = self.game.last_card
+
+        game.deck.cards = list_subtract_unsorted(game.deck.cards,
+                                                 self.game.deck.graveyard)
+        game.deck.graveyard = list(self.game.deck.graveyard)
+
+        for player in self.game.players:
+            p = UNOPlayer(game, None)
+            if player is observer:
+                p.cards = list(player.cards)
+            else:
+                for i in range(len(player.cards)):
+                    p.cards.append(game.deck.draw())
+
+        return UNOState(game)
+
+    def DoMove(self, move):
+        """ Update a state by carrying out the given move.
+            Must update playerToMove.
+        """
+        if move == 'draw':
+            for n in range(self.game.draw_counter or 1):
+                self.game.current_player.cards.append(
+                    self.game.deck.draw()
+                )
+
+            self.game.draw_counter = 0
+            self.game.turn()
+        else:
+            self.game.current_player.cards.remove(move)
+
+            self.game.play_card(move)
+            if move.special:
+                self.game.turn()
+                self.game.choosing_color = False
+
+    def GetMoves(self):
+        """ Get all possible moves from this state.
+        """
+        if self.game.current_player.cards:
+            playable = self.game.current_player.playable_cards()
+            playable_converted = list()
+            for card in playable:
+                if not card.color:
+                    for color in c.COLORS:
+                        playable_converted.append(
+                            c.Card(color, None, card.special)
+                        )
+                else:
+                    playable_converted.append(card)
+
+            # playable_converted.append('draw')
+            return playable_converted or ['draw']
+        else:
+            return list()
+
+    def GetResult(self, player):
+        """ Get the game result from the viewpoint of player.
+        """
+        return 1 if not player.cards else 0
+
+    def __repr__(self):
+        """ Return a human-readable representation of the state
+        """
+        return '\n'.join(
+            ['%s: %s' % (p.user, [str(c) for c in p.cards])
+             for p in self.game.players]
+        ) + "\nDeck: %s" % str([str(crd) for crd in self.game.deck.cards]) \
+          + "\nGrav: %s" % str([str(crd) for crd in self.game.deck.graveyard])
+
+
+class Node:
+    """ A node in the game tree. Note wins is always from the viewpoint of playerJustMoved.
+    """
+
+    def __init__(self, move=None, parent=None, playerJustMoved=None):
+        self.move = move  # the move that got us to this node - "None" for the root node
+        self.parentNode = parent  # "None" for the root node
+        self.childNodes = []
+        self.wins = 0
+        self.visits = 0
+        self.avails = 1
+        self.playerJustMoved = playerJustMoved  # the only part of the state that the Node needs later
+
+    def GetUntriedMoves(self, legalMoves):
+        """ Return the elements of legalMoves for which this node does not have children.
+        """
+
+        # Find all moves for which this node *does* have children
+        triedMoves = [child.move for child in self.childNodes]
+
+        # Return all moves that are legal but have not been tried yet
+        return [move for move in legalMoves if move not in triedMoves]
+
+    def UCBSelectChild(self, legalMoves, exploration=0.7):
+        """ Use the UCB1 formula to select a child node, filtered by the given list of legal moves.
+            exploration is a constant balancing between exploitation and exploration, with default value 0.7 (approximately sqrt(2) / 2)
+        """
+
+        # Filter the list of children by the list of legal moves
+        legalChildren = [child for child in self.childNodes if
+                         child.move in legalMoves]
+
+        # Get the child with the highest UCB score
+        s = max(legalChildren, key=lambda c: float(c.wins) / float(
+            c.visits) + exploration * sqrt(log(c.avails) / float(c.visits)))
+
+        # Update availability counts -- it is easier to do this now than during backpropagation
+        for child in legalChildren:
+            child.avails += 1
+
+        # Return the child selected above
+        return s
+
+    def AddChild(self, m, p):
+        """ Add a new child node for the move m.
+            Return the added child node
+        """
+        n = Node(move=m, parent=self, playerJustMoved=p)
+        self.childNodes.append(n)
+        return n
+
+    def Update(self, terminalState):
+        """ Update this node - increment the visit count by one, and increase the win count by the result of terminalState for self.playerJustMoved.
+        """
+        self.visits += 1
+        if self.playerJustMoved is not None:
+            self.wins += terminalState.GetResult(self.playerJustMoved)
+
+    def __repr__(self):
+        return "[M:%s W/V/A: %4i/%4i/%4i]" % (
+        self.move, self.wins, self.visits, self.avails)
+
+    def TreeToString(self, indent):
+        """ Represent the tree as a string, for debugging purposes.
+        """
+        s = self.IndentString(indent) + str(self)
+        for c in self.childNodes:
+            s += c.TreeToString(indent + 1)
+        return s
+
+    def IndentString(self, indent):
+        s = "\n"
+        for i in range(1, indent + 1):
+            s += "| "
+        return s
+
+    def ChildrenToString(self):
+        s = ""
+        for c in self.childNodes:
+            s += str(c) + "\n"
+        return s
+
+
+def ISMCTS(rootstate, itermax, verbose=False):
+    """ Conduct an ISMCTS search for itermax iterations starting from rootstate.
+        Return the best move from the rootstate.
+    """
+
+    rootnode = Node()
+
+    for i in range(itermax):
+        node = rootnode
+
+        # Determinize
+        state = rootstate.CloneAndRandomize(rootstate.playerToMove)
+
+        # Select
+        while state.GetMoves() != [] and node.GetUntriedMoves(
+                state.GetMoves()) == []:  # node is fully expanded and non-terminal
+            node = node.UCBSelectChild(state.GetMoves())
+            state.DoMove(node.move)
+
+        # Expand
+        untriedMoves = node.GetUntriedMoves(state.GetMoves())
+        if untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)
+            m = random.choice(untriedMoves)
+            player = state.playerToMove
+            state.DoMove(m)
+            node = node.AddChild(m, player)  # add child and descend tree
+
+        # Simulate
+        while state.GetMoves() != []:  # while state is non-terminal
+            state.DoMove(random.choice(state.GetMoves()))
+
+        # Backpropagate
+        while node != None:  # backpropagate from the expanded node and work back to the root node
+            node.Update(state)
+            node = node.parentNode
+
+    # Output some information about the tree - can be omitted
+    if (verbose):
+        print(rootnode.TreeToString(0))
+    else:
+        print(rootnode.ChildrenToString())
+
+    return max(rootnode.childNodes, key=lambda
+        c: c.visits).move  # return the move that was most visited
+
+
+def PlayGame():
+    """ Play a sample game between two ISMCTS players.
+        *** This is only a demo and not used by the actual bot ***
+    """
+    game = UNOGame(None)
+    me = UNOPlayer(game, "Player 1")
+    UNOPlayer(game, "Player 2")
+    UNOPlayer(game, "Player 3")
+    UNOPlayer(game, "Player 4")
+    UNOPlayer(game, "Player 5")
+
+    state = UNOState(game)
+
+    while (state.GetMoves() != []):
+        print(str(state))
+        # Use different numbers of iterations (simulations, tree nodes) for different players
+        m = ISMCTS(rootstate=state, itermax=10, verbose=False)
+        # if state.playerToMove is me:
+        #     m = ISMCTS(rootstate=state, itermax=1000, verbose=False)
+        # else:
+        #     m = ISMCTS(rootstate=state, itermax=100, verbose=False)
+        print("Best Move: " + str(m) + "\n")
+        state.DoMove(m)
+
+    someoneWon = False
+    for p in game.players:
+        if state.GetResult(p) > 0:
+            print("Player " + str(p) + " wins!")
+            someoneWon = True
+    if not someoneWon:
+        print("Nobody wins!")
+
+
+if __name__ == "__main__":
+    PlayGame()