In [1]:
%load_ext autoreload
%autoreload 2

import sys; sys.path.insert( 0, '..' )

In [2]:
import numpy as np

from src.game import LeducPokerState
print( LeducPokerState.__doc__ )


    Leduc Holdem is played as follows:

    The deck consists of (J, J, Q, Q, K, K).
    Each player gets 1 card.
    There are two betting rounds, and the total number of raises in each round is at most 2.
    In the second round, one card is revealed on the table and this is used to create a hand.
    There are two types of hands: pair and highest card.
    There are three moves: call, raise, and fold.
    Each of the two players antes 1.
    In the first round, the betting amount is 2 (including the ante for the first bet). In the second round, it is 4.

    This gives a total of 144 information states per player. (288 info states in total)
    


In [3]:
# Train Discounted CFR

from src.cfr import DiscountedCFRTrainer
print( DiscountedCFRTrainer.__doc__ )

trainer = DiscountedCFRTrainer( LeducPokerState.initial_state() )
trainer.train( 1500, verbose=True, log_interval=250 )


    Uses Discounted CFR to find approximate Nash Equilibrium
    
Iteration 250, EV = -0.08792733679193222
Iteration 500, EV = -0.08672640322080394
Iteration 750, EV = -0.0862916566835874
Iteration 1000, EV = -0.0861342918562822
Iteration 1250, EV = -0.0860209586575692
Iteration 1500, EV = -0.0859431412103651


In [4]:
# Compute exploitability
from src.cfr import Exploitability

exploit_calc = Exploitability( LeducPokerState.initial_state(), trainer.strategy() )
exploit_calc.compute()
exploit_calc.get_ev()["exploitability"]

0.06340735618084371

In [16]:
# Play against AI as first player
state = LeducPokerState.initial_state()
while not state.is_terminal():
    if state.is_chance():
        state = state.next_state()
    elif state.get_player() == 1:
        infoset = state.get_infoset()
        actions = ai_strategy( infoset )
        sampled_action = np.random.choice( infoset[1], p=actions )
        print( "AI action:", sampled_action )
        state = state.next_state( sampled_action )
    else:
        infoset = state.get_infoset()
        print( "Your infostate:", infoset[0] )
        print( "Available actions:", infoset[1] )
        action = input( "Player's action:" )
        state = state.next_state( action )
    
print( "End of game, you win", state.get_payoff( 0 ) )
print( "Opponent's card was", state.hole_cards[1] )

Your infostate: J::
Available actions: ['c', 'b']
Player's action:c
AI action: c
Your infostate: JQ:cc:
Available actions: ['c', 'b']
Player's action:c
AI action: b
Your infostate: JQ:cc:cb
Available actions: ['f', 'c', 'b']
Player's action:f
End of game, you win -1
Opponent's card was Q
