Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion open_spiel/python/algorithms/psro_v2/psro_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,10 @@ def __init__(self,

# Alpharank is a special case here, as it's not supported by the abstract
# meta trainer api, so has to be passed as a function instead of a string.
if not meta_strategy_method or meta_strategy_method == "alpharank":

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should catch the case when meta_strategy_method is None (or not provided) and make it default to alpharank to ensure backwards compatability

if meta_strategy_method == "alpharank":
meta_strategy_method = utils.alpharank_strategy
if meta_strategy_method == "ssd":
meta_strategy_method = utils.ssd_strategy

print("Sampling from marginals : {}".format(sample_from_marginals))
self.sample_from_marginals = sample_from_marginals
Expand Down
63 changes: 60 additions & 3 deletions open_spiel/python/algorithms/psro_v2/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from open_spiel.python.algorithms import policy_aggregator_joint
from open_spiel.python.egt import alpharank
from open_spiel.python.egt import utils as alpharank_utils
from open_spiel.python.egt import ssd


def empty_list_generator(number_dimensions):
Expand Down Expand Up @@ -221,7 +222,6 @@ def get_joint_strategy_from_marginals(probabilities):
probas.append(np.array(probabilities[i]).reshape(probas_shapes))
return np.prod(probas)


def alpharank_strategy(solver, return_joint=False, **unused_kwargs):
"""Returns AlphaRank distribution on meta game matrix.

Expand All @@ -241,12 +241,69 @@ def alpharank_strategy(solver, return_joint=False, **unused_kwargs):
meta_games = solver.get_meta_game()
meta_games = [np.asarray(x) for x in meta_games]

solver_kwargs = solver.get_kwargs()
alpharank_kwargs = {}
alpharank_kwargs["use_sparse"] = (solver_kwargs["use_sparse"]
if "use_sparse" in solver_kwargs
else False)

if solver.symmetric_game:
meta_games = [meta_games[0]]

# Get alpharank distribution via alpha-sweep
joint_distr = alpharank.sweep_pi_vs_epsilon(
meta_games)
meta_games, **alpharank_kwargs)
joint_distr = remove_epsilon_negative_probs(joint_distr)

marginals = 2 * [joint_distr]
joint_distr = get_joint_strategy_from_marginals(marginals)
if return_joint:
return marginals, joint_distr
else:
return joint_distr

else:
joint_distr = alpharank.sweep_pi_vs_epsilon(meta_games,
**alpharank_kwargs)
joint_distr = remove_epsilon_negative_probs(joint_distr)

if return_joint:
marginals = get_alpharank_marginals(meta_games, joint_distr)
return marginals, joint_distr
else:
return joint_distr

def ssd_strategy(solver, return_joint=False, **unused_kwargs):
"""Returns SSD distribution on meta game matrix.

Args:
solver: GenPSROSolver instance.
return_joint: a boolean specifying whether to return player-wise
marginals.

Returns:
marginals: a list, specifying for each player the alpharank marginal
distributions on their strategies.
joint_distr: a list, specifying the joint alpharank distributions for all
strategy profiles.
"""
meta_games = solver.get_meta_game()
meta_games = [np.asarray(x) for x in meta_games]

solver_kwargs = solver.get_kwargs() if hasattr(solver, "get_kwargs") else {}
ssd_kwargs = {}
if isinstance(solver_kwargs, dict):
for key, value in solver_kwargs.items():
if value is not None:
ssd_kwargs[key] = value

if solver.symmetric_game:
meta_games = [meta_games[0]]

# alpharank distribution via alpha-sweep
# joint_distr = alpharank.sweep_pi_vs_epsilon(
# meta_games)
joint_distr = ssd.compute_ssd(meta_games, **ssd_kwargs)
joint_distr = remove_epsilon_negative_probs(joint_distr)

marginals = 2 * [joint_distr]
Expand All @@ -257,7 +314,7 @@ def alpharank_strategy(solver, return_joint=False, **unused_kwargs):
return joint_distr

else:
joint_distr = alpharank.sweep_pi_vs_epsilon(meta_games)
joint_distr = ssd.compute_ssd(meta_games, **ssd_kwargs)
joint_distr = remove_epsilon_negative_probs(joint_distr)

if return_joint:
Expand Down
Loading
Loading