In [1]:
%load_ext autoreload
%autoreload 2

import sys; sys.path.insert( 0, '..' )

In [2]:
import numpy as np

from src.game import ECardState
print( ECardState.__doc__ )


    Each player has 5 cards, 4 Citizens and either Slave or Emperor.

    The Citizen card represents the common man, and cannot defeat the Emperor who sits at the top. It can, however, defeat the Slave, who resides at the very bottom of the system. Two citizen against each other results in a tie.

    The Emperor represents the one at the top of society. This card can defeat the citizen, but will lose to the Slave.

    The Slave is presented as the one at the very bottom of society. Seeing as how it has nothing to lose, it can overthrow the Emperor in one last attempt at revenge.

    Winning as the emperor grants you one point, while winning as the slave grants you three.
    


In [3]:
from src.cfr import CFRPlusTrainer
print( CFRPlusTrainer.__doc__ )

trainer = CFRPlusTrainer( ECardState.initial_state() )
trainer.train( 10000, verbose=True, log_interval=250 )

ai_strategy = trainer.strategy()


    Uses CFR+ to find approximate Nash Equilibrium
    
Iteration 250, EV = 0.2034339750958311
Iteration 500, EV = 0.20161628550786065
Iteration 750, EV = 0.20110521644402857
Iteration 1000, EV = 0.2008078472830618
Iteration 1250, EV = 0.2006315799499673
Iteration 1500, EV = 0.2005292388790681
Iteration 1750, EV = 0.20046404517052008
Iteration 2000, EV = 0.2004034440299585
Iteration 2250, EV = 0.20036379533259155
Iteration 2500, EV = 0.20031878946071932
Iteration 2750, EV = 0.2002966187802057
Iteration 3000, EV = 0.20027090659790944
Iteration 3250, EV = 0.20025124588496196
Iteration 3500, EV = 0.20022675686616145
Iteration 3750, EV = 0.20021031740551837
Iteration 4000, EV = 0.20020078021941273
Iteration 4250, EV = 0.2001850489360304
Iteration 4500, EV = 0.2001782469330473
Iteration 4750, EV = 0.2001713999122966
Iteration 5000, EV = 0.2001602977045031
Iteration 5250, EV = 0.20015273149214224
Iteration 5500, EV = 0.200145226082911
Iteration 5750, EV = 0.200137005424081
Iteration 6000, E

In [4]:
# Show strategy for infosets rounded to 2 decimal places
{ k: np.round( v / sum( v ), decimals=2 ) for k, v in trainer.strategy_sum.items() }

{'': array([0.2, 0.8]),
 ':': array([0.2, 0.8]),
 'c': array([0.25, 0.75]),
 'c:': array([0.25, 0.75]),
 'cc': array([0.33, 0.67]),
 'cc:': array([0.34, 0.66]),
 'ccc': array([0.5, 0.5]),
 'ccc:': array([0.5, 0.5]),
 'cccc': array([1.]),
 'cccc:': array([1.])}

In [5]:
# Compute exploitability of strategy found using CFR+

from src.cfr import Exploitability

exploit_calc = Exploitability( ECardState.initial_state(), ai_strategy )
exploit_calc.compute( 10000 )

exploit_calc.get_ev()

{'ev1': 0.21307266409977618,
 'ev2': -0.20456958822021562,
 'exploitability': 0.008503075879560562}

In [6]:
# Compute exploitability of strategy that always plays Slave/Emperor card

def always_play_es_card( infostate ):
    actions = np.zeros( len( infostate[1] ) )
    actions[0] = 1
    return actions

exploit_calc = Exploitability( ECardState.initial_state(), always_play_es_card )
exploit_calc.compute( 10000 )

exploit_calc.get_ev()

{'ev1': 0.9999999999940009,
 'ev2': 2.999999999997,
 'exploitability': 3.999999999991001}