diff --git a/open_spiel/python/algorithms/psro_v2/psro_v2.py b/open_spiel/python/algorithms/psro_v2/psro_v2.py index 7ae2fa10c9..3cc11def02 100644 --- a/open_spiel/python/algorithms/psro_v2/psro_v2.py +++ b/open_spiel/python/algorithms/psro_v2/psro_v2.py @@ -154,8 +154,10 @@ def __init__(self, # Alpharank is a special case here, as it's not supported by the abstract # meta trainer api, so has to be passed as a function instead of a string. - if not meta_strategy_method or meta_strategy_method == "alpharank": + if meta_strategy_method == "alpharank": meta_strategy_method = utils.alpharank_strategy + if meta_strategy_method == "ssd": + meta_strategy_method = utils.ssd_strategy print("Sampling from marginals : {}".format(sample_from_marginals)) self.sample_from_marginals = sample_from_marginals diff --git a/open_spiel/python/algorithms/psro_v2/utils.py b/open_spiel/python/algorithms/psro_v2/utils.py index 1d81f2dc66..2f031e4718 100644 --- a/open_spiel/python/algorithms/psro_v2/utils.py +++ b/open_spiel/python/algorithms/psro_v2/utils.py @@ -22,6 +22,7 @@ from open_spiel.python.algorithms import policy_aggregator_joint from open_spiel.python.egt import alpharank from open_spiel.python.egt import utils as alpharank_utils +from open_spiel.python.egt import ssd def empty_list_generator(number_dimensions): @@ -221,7 +222,6 @@ def get_joint_strategy_from_marginals(probabilities): probas.append(np.array(probabilities[i]).reshape(probas_shapes)) return np.prod(probas) - def alpharank_strategy(solver, return_joint=False, **unused_kwargs): """Returns AlphaRank distribution on meta game matrix. @@ -241,12 +241,69 @@ def alpharank_strategy(solver, return_joint=False, **unused_kwargs): meta_games = solver.get_meta_game() meta_games = [np.asarray(x) for x in meta_games] + solver_kwargs = solver.get_kwargs() + alpharank_kwargs = {} + alpharank_kwargs["use_sparse"] = (solver_kwargs["use_sparse"] + if "use_sparse" in solver_kwargs + else False) + if solver.symmetric_game: meta_games = [meta_games[0]] # Get alpharank distribution via alpha-sweep joint_distr = alpharank.sweep_pi_vs_epsilon( - meta_games) + meta_games, **alpharank_kwargs) + joint_distr = remove_epsilon_negative_probs(joint_distr) + + marginals = 2 * [joint_distr] + joint_distr = get_joint_strategy_from_marginals(marginals) + if return_joint: + return marginals, joint_distr + else: + return joint_distr + + else: + joint_distr = alpharank.sweep_pi_vs_epsilon(meta_games, + **alpharank_kwargs) + joint_distr = remove_epsilon_negative_probs(joint_distr) + + if return_joint: + marginals = get_alpharank_marginals(meta_games, joint_distr) + return marginals, joint_distr + else: + return joint_distr + +def ssd_strategy(solver, return_joint=False, **unused_kwargs): + """Returns SSD distribution on meta game matrix. + + Args: + solver: GenPSROSolver instance. + return_joint: a boolean specifying whether to return player-wise + marginals. + + Returns: + marginals: a list, specifying for each player the alpharank marginal + distributions on their strategies. + joint_distr: a list, specifying the joint alpharank distributions for all + strategy profiles. + """ + meta_games = solver.get_meta_game() + meta_games = [np.asarray(x) for x in meta_games] + + solver_kwargs = solver.get_kwargs() if hasattr(solver, "get_kwargs") else {} + ssd_kwargs = {} + if isinstance(solver_kwargs, dict): + for key, value in solver_kwargs.items(): + if value is not None: + ssd_kwargs[key] = value + + if solver.symmetric_game: + meta_games = [meta_games[0]] + + # alpharank distribution via alpha-sweep + # joint_distr = alpharank.sweep_pi_vs_epsilon( + # meta_games) + joint_distr = ssd.compute_ssd(meta_games, **ssd_kwargs) joint_distr = remove_epsilon_negative_probs(joint_distr) marginals = 2 * [joint_distr] @@ -257,7 +314,7 @@ def alpharank_strategy(solver, return_joint=False, **unused_kwargs): return joint_distr else: - joint_distr = alpharank.sweep_pi_vs_epsilon(meta_games) + joint_distr = ssd.compute_ssd(meta_games, **ssd_kwargs) joint_distr = remove_epsilon_negative_probs(joint_distr) if return_joint: diff --git a/open_spiel/python/egt/alpharank.py b/open_spiel/python/egt/alpharank.py index 1dc78c00ab..9670c912f2 100644 --- a/open_spiel/python/egt/alpharank.py +++ b/open_spiel/python/egt/alpharank.py @@ -25,11 +25,30 @@ import numpy as np import scipy.linalg as la +import scipy.sparse as sps +import scipy.sparse.linalg as spla from open_spiel.python.egt import alpharank_visualizer from open_spiel.python.egt import utils +def _log_matrix_sparsity(matrix, label): + """Print sparsity stats for the provided matrix.""" + if matrix is None: + return + total = matrix.shape[0] * matrix.shape[1] + if not total: + density = 0.0 + nnz = 0 + elif sps.isspmatrix(matrix): + nnz = matrix.nnz + density = (nnz / total) * 100.0 + else: + nnz = np.count_nonzero(matrix) + density = (nnz / total) * 100.0 + print(f"{label}: {nnz}/{total} non-zero entries ({density:.2f}% density).") + + def _get_payoff(payoff_table_k, payoffs_are_hpt_format, strat_profile, k=None): """Gets the payoff of the k-th agent in a single or multi-population game. @@ -153,7 +172,12 @@ def _get_rho_sr(payoff_table, assert payoff_sum is not None u = alpha * m / (m - 1) * (payoff_rs - payoff_sum / 2) - if np.isclose(u, 0, atol=1e-14): + if ( + np.isclose(u, 0, atol=1e-14) or + np.isclose((1 - np.exp(-m * u)), 0, atol=1e-5) or + (1 - np.exp(-m * u)) == float('inf') or + (1 - np.exp(-m * u)) == float('-inf') + ): # To avoid divide by 0, use first-order approximation when u is near 0 result = 1 / m else: @@ -231,7 +255,12 @@ def _get_rho_sr_multipop(payoff_table_k, if use_fast_compute: u = alpha * (f_r - f_s) - if np.isclose(u, 0, atol=1e-14): + if ( + np.isclose(u, 0, atol=1e-14) or + np.isclose((1 - np.exp(-m * u)), 0, atol=1e-5) or + (1 - np.exp(-m * u)) == float('inf') or + (1 - np.exp(-m * u)) == float('-inf') + ): # To avoid divide by 0, use first-order approximation when u is near 0 result = 1 / m else: @@ -256,7 +285,8 @@ def _get_singlepop_transition_matrix(payoff_table, use_local_selection_model, payoff_sum, use_inf_alpha=False, - inf_alpha_eps=0.1): + inf_alpha_eps=0.1, + use_sparse=False): """Gets the Markov transition matrix for a single-population game. Args: @@ -282,11 +312,15 @@ def _get_singlepop_transition_matrix(payoff_table, [payoff_table], payoffs_are_hpt_format) num_strats = num_strats_per_population[0] - c = np.zeros((num_strats, num_strats)) + if use_sparse: + c = sps.dok_matrix((num_strats, num_strats), dtype=float) + else: + c = np.zeros((num_strats, num_strats)) rhos = np.zeros((num_strats, num_strats)) # r and s are, respectively, the column and row strategy profiles for s in range(num_strats): # Current strategy + row_mass = 0.0 for r in range(num_strats): # Next strategy if s != r: # Compute off-diagonal fixation probabilities if use_inf_alpha: @@ -298,24 +332,37 @@ def _get_singlepop_transition_matrix(payoff_table, payoff_sr = _get_payoff( payoff_table, payoffs_are_hpt_format, strat_profile=[s, r], k=0) if np.isclose(payoff_rs, payoff_sr, atol=1e-14): - c[s, r] = eta * 0.5 + prob = eta * 0.5 elif payoff_rs > payoff_sr: # Transition to r since its payoff is higher than s, but remove some # small amount of mass, inf_alpha_eps, to keep the chain irreducible - c[s, r] = eta * (1 - inf_alpha_eps) + prob = eta * (1 - inf_alpha_eps) else: # Transition with very small probability - c[s, r] = eta * inf_alpha_eps + prob = eta * inf_alpha_eps else: rhos[s, r] = _get_rho_sr(payoff_table, payoffs_are_hpt_format, m, r, s, alpha, game_is_constant_sum, use_local_selection_model, payoff_sum) eta = 1. / (num_strats - 1) - c[s, r] = eta * rhos[s, r] + prob = eta * rhos[s, r] + + if prob: + if use_sparse: + c[s, r] = prob + else: + c[s, r] = prob + row_mass += prob # Fixation probability of competing only against one's own strategy is 1 # rhos[s,s] = 1. # Commented as self-fixations are not interesting (for now) - c[s, s] = 1 - sum(c[s, :]) # Diagonals + diag_val = 1 - row_mass + if use_sparse: + c[s, s] = diag_val + else: + c[s, s] = diag_val + if use_sparse: + return c.tocsr(), rhos return c, rhos @@ -324,21 +371,25 @@ def _get_multipop_transition_matrix(payoff_tables, m, alpha, use_inf_alpha=False, - inf_alpha_eps=0.1): + inf_alpha_eps=0.1, + use_sparse=False): """Gets Markov transition matrix for multipopulation games.""" num_strats_per_population = utils.get_num_strats_per_population( payoff_tables, payoffs_are_hpt_format) num_profiles = utils.get_num_profiles(num_strats_per_population) - eta = 1. / (np.sum(num_strats_per_population - 1)) - c = np.zeros((num_profiles, num_profiles)) + if use_sparse: + c = sps.dok_matrix((num_profiles, num_profiles), dtype=float) + else: + c = np.zeros((num_profiles, num_profiles)) rhos = np.zeros((num_profiles, num_profiles)) for id_row_profile in range(num_profiles): row_profile = utils.get_strat_profile_from_id(num_strats_per_population, id_row_profile) + row_mass = 0.0 next_profile_gen = utils.get_valid_next_profiles(num_strats_per_population, row_profile) @@ -358,15 +409,15 @@ def _get_multipop_transition_matrix(payoff_tables, row_profile, k=index_population_that_changed) if np.isclose(payoff_col, payoff_row, atol=1e-14): - c[id_row_profile, id_col_profile] = eta * 0.5 + prob = eta * 0.5 elif payoff_col > payoff_row: # Transition to col strategy since its payoff is higher than row # strategy, but remove some small amount of mass, inf_alpha_eps, to # keep the chain irreducible - c[id_row_profile, id_col_profile] = eta * (1 - inf_alpha_eps) + prob = eta * (1 - inf_alpha_eps) else: # Transition with very small probability - c[id_row_profile, id_col_profile] = eta * inf_alpha_eps + prob = eta * inf_alpha_eps else: rhos[id_row_profile, id_col_profile] = _get_rho_sr_multipop( payoff_table_k=payoff_tables[index_population_that_changed], @@ -376,28 +427,45 @@ def _get_multipop_transition_matrix(payoff_tables, r=col_profile, s=row_profile, alpha=alpha) - c[id_row_profile, - id_col_profile] = eta * rhos[id_row_profile, id_col_profile] + prob = eta * rhos[id_row_profile, id_col_profile] + + if prob: + c[id_row_profile, id_col_profile] = prob + row_mass += prob # Special case of self-transition - c[id_row_profile, id_row_profile] = 1 - sum(c[id_row_profile, :]) + diag_val = 1 - row_mass + c[id_row_profile, id_row_profile] = diag_val + if use_sparse: + return c.tocsr(), rhos return c, rhos -def _get_stationary_distr(c): +def _get_stationary_distr(c, use_sparse=False): """Gets stationary distribution of transition matrix c.""" - eigenvals, left_eigenvecs, _ = la.eig(c, left=True, right=True) + if use_sparse: + c_sparse = c if sps.isspmatrix(c) else sps.csr_matrix(c) + try: + eigenvals, eigenvecs = spla.eigs(c_sparse.T, k=1, which='LM') + except Exception: + raise Exception('Sparse eigenvalue decomposition did not find 1 stationary distribution') + left_vec = eigenvecs[:, 0] + else: + eigenvals, left_eigenvecs, _ = la.eig(c, left=True, right=True) - mask = abs(eigenvals - 1.) < 1e-10 - left_eigenvecs = left_eigenvecs[:, mask] - num_stationary_eigenvecs = np.shape(left_eigenvecs)[1] - if num_stationary_eigenvecs != 1: - raise ValueError('Expected 1 stationary distribution, but found %d' % - num_stationary_eigenvecs) - left_eigenvecs *= 1. / sum(left_eigenvecs) + mask = abs(eigenvals - 1.) < 1e-10 + left_eigenvecs = left_eigenvecs[:, mask] + num_stationary_eigenvecs = np.shape(left_eigenvecs)[1] + if num_stationary_eigenvecs != 1: + raise ValueError('Expected 1 stationary distribution, but found %d' % + num_stationary_eigenvecs) + left_vec = left_eigenvecs[:, 0] - return left_eigenvecs.real.flatten() + pi = left_vec.real + pi_sum = np.sum(pi) + pi /= pi_sum + return pi.flatten() def print_results(payoff_tables, @@ -420,7 +488,9 @@ def print_results(payoff_tables, print('\nFixation probability matrix (rho_{r,s}/rho_m):\n', np.around(rhos / rho_m, decimals=2)) if c is not None: - print('\nMarkov transition matrix (c):\n', np.around(c, decimals=2)) + c_to_print = c.toarray() if sps.isspmatrix(c) else c + print('\nMarkov transition matrix (c):\n', + np.around(c_to_print, decimals=2)) if pi is not None: print('\nStationary distribution (pi):\n', pi) @@ -434,7 +504,8 @@ def sweep_pi_vs_epsilon(payoff_tables, max_iters=100, min_epsilon=1e-14, num_strats_to_label=10, - legend_sort_clusters=False): + legend_sort_clusters=False, + use_sparse=False): """Computes infinite-alpha distribution for a range of perturbations. The range of response graph perturbations is defined in epsilon_list. @@ -464,6 +535,7 @@ def sweep_pi_vs_epsilon(payoff_tables, the legend according to orderings for earlier alpha values. Primarily for visualization purposes! Rankings for lower alpha values should be interpreted carefully. + use_sparse: If true, use scipy sparse arrays and solvers. Returns: pi: AlphaRank stationary distribution. @@ -500,7 +572,8 @@ def sweep_pi_vs_epsilon(payoff_tables, try: pi_prev = pi _, _, pi, _, _ = compute(payoff_tables, m=m, alpha=alpha, - use_inf_alpha=True, inf_alpha_eps=epsilon) + use_inf_alpha=True, inf_alpha_eps=epsilon, + use_sparse=use_sparse) epsilon_pi_hist[epsilon] = pi # Stop when pi converges if num_iters > min_iters and np.allclose(pi, pi_prev): @@ -565,7 +638,8 @@ def sweep_pi_vs_alpha(payoff_tables, rtol=1e-5, atol=1e-8, num_strats_to_label=10, - legend_sort_clusters=False): + legend_sort_clusters=False, + use_sparse=False): """Computes stationary distribution, pi, for range of selection intensities. The range of selection intensities is defined in alpha_list and corresponds @@ -588,12 +662,12 @@ def sweep_pi_vs_alpha(payoff_tables, the legend according to orderings for earlier alpha values. Primarily for visualization purposes! Rankings for lower alpha values should be interpreted carefully. + use_sparse: If true, use scipy sparse arrays and solvers. Returns: pi: AlphaRank stationary distribution. alpha: The AlphaRank selection-intensity level resulting from sweep. """ - payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) num_populations = len(payoff_tables) num_strats_per_population = utils.get_num_strats_per_population( @@ -620,7 +694,8 @@ def sweep_pi_vs_alpha(payoff_tables, while 1: try: - _, _, pi, _, _ = compute(payoff_tables, alpha=alpha, m=m) + _, _, pi, _, _ = compute(payoff_tables, alpha=alpha, m=m, + use_sparse=use_sparse) pi_list = np.append(pi_list, np.reshape(pi, (-1, 1)), axis=1) alpha_list.append(alpha) # Stop when pi converges @@ -669,7 +744,8 @@ def compute_and_report_alpharank(payoff_tables, m=50, alpha=100, verbose=False, - num_top_strats_to_print=8): + num_top_strats_to_print=8, + use_sparse=False): """Computes and visualizes Alpha-Rank outputs. Args: @@ -680,12 +756,14 @@ def compute_and_report_alpharank(payoff_tables, alpha: Fermi distribution temperature parameter. verbose: Set to True to print intermediate results. num_top_strats_to_print: Number of top strategies to print. + use_sparse: If true, use scipy sparse arrays and solvers. Returns: pi: AlphaRank stationary distribution/rankings. """ payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) - rhos, rho_m, pi, _, _ = compute(payoff_tables, m=m, alpha=alpha) + rhos, rho_m, pi, _, _ = compute(payoff_tables, m=m, alpha=alpha, + use_sparse=use_sparse) strat_labels = utils.get_strat_profile_labels(payoff_tables, payoffs_are_hpt_format) @@ -709,7 +787,8 @@ def compute(payoff_tables, use_local_selection_model=True, verbose=False, use_inf_alpha=False, - inf_alpha_eps=0.01): + inf_alpha_eps=0.01, + use_sparse=False): """Computes the finite population stationary statistics. Args: @@ -724,6 +803,8 @@ def compute(payoff_tables, verbose: Set to True to print intermediate results. use_inf_alpha: Use infinite-alpha alpharank model. inf_alpha_eps: Noise term to use in infinite-alpha alpharank model. + use_sparse: If true, store transition matrices sparsely and use sparse + eigensolvers when computing stationary distributions. Returns: rhos: Matrix of strategy-to-strategy fixation probabilities. @@ -752,7 +833,6 @@ def compute(payoff_tables, print('num_strats_per_population:', num_strats_per_population) if num_populations == 1: - # User fast closed-form analysis for constant-sum single-population games game_is_constant_sum, payoff_sum = utils.check_is_constant_sum( payoff_tables[0], payoffs_are_hpt_format) if verbose: @@ -768,7 +848,8 @@ def compute(payoff_tables, use_local_selection_model, payoff_sum, use_inf_alpha=use_inf_alpha, - inf_alpha_eps=inf_alpha_eps) + inf_alpha_eps=inf_alpha_eps, + use_sparse=use_sparse) num_profiles = num_strats_per_population[0] else: c, rhos = _get_multipop_transition_matrix( @@ -777,10 +858,12 @@ def compute(payoff_tables, m, alpha, use_inf_alpha=use_inf_alpha, - inf_alpha_eps=inf_alpha_eps) + inf_alpha_eps=inf_alpha_eps, + use_sparse=use_sparse) num_profiles = utils.get_num_profiles(num_strats_per_population) - - pi = _get_stationary_distr(c) + if verbose or use_sparse: + _log_matrix_sparsity(c, 'a-rank transition matrix sparsity') + pi = _get_stationary_distr(c, use_sparse=use_sparse) rho_m = 1. / m if not use_inf_alpha else 1 # Neutral fixation probability if verbose: diff --git a/open_spiel/python/egt/kosaraju.py b/open_spiel/python/egt/kosaraju.py new file mode 100644 index 0000000000..06b8b94f59 --- /dev/null +++ b/open_spiel/python/egt/kosaraju.py @@ -0,0 +1,195 @@ +import scipy.sparse as sps + + +class Graph(object): + """Implementation of Kosaraju's algorithm for computing communicating classes of a + directed graph, expressed as an adjacency matrix. Taken from + http://www.kylesauri.com/home/kosarajus-algorithm-in-python-3. + + Some methods are implemented in a non-recursive way to avoid recursion limit. + """ + + def __init__(self, mat, zeroTest): + """Takes an adjacency matrix and prepares for computing its communicating + and closed classes. + """ + s = mat.shape + self.dim = s[0] + assert(self.dim==s[1] and len(s)==2) + self.zeroTest = zeroTest + self.mat = mat + self.is_sparse = sps.issparse(mat) + self.communicatingClasses = None + self.closedClasses = None + + + def Visit(self, vertex): + """Performs a depth-first search of the graph starting + from the given node/vertex to compute a list of + sortedVertices consistent with the pre-order generated by + adjacency, keeping track of progress via the list of + visited vertices. + """ + + if vertex in self.visited: + return + visited_set = set(self.visited) + + stack = [(vertex, False)] + while stack: + v, processed = stack.pop() + + if processed: + self.sortedVertices.insert(0, v) + continue + + if v in visited_set: + continue + + self.visited.append(v) + visited_set.add(v) + stack.append((v, True)) + + if self.is_sparse: + col = self.mat.getcol(v) + nbrs = col.nonzero()[0] + for neighbor in sorted(nbrs, reverse=True): + if neighbor in visited_set: + continue + val = self.mat[neighbor, v] + if not self.zeroTest(val): + stack.append((neighbor, False)) + else: + for neighbor in range(self.dim - 1, -1, -1): + if not self.zeroTest(self.mat[neighbor, v]) and neighbor not in visited_set: + stack.append((neighbor, False)) + + + def Assign(self, vertex, root): + """Associate a vertex with its communicating class representative, root, + via the components dictionary. + """ + stack = [vertex] + while stack: + v = stack.pop() + if v in self.components: + continue + self.components[v] = root + if self.is_sparse: + row = self.mat.getrow(v) + nbrs = row.nonzero()[1] + for neighbor in nbrs: + if neighbor in self.components: + continue + val = self.mat[v, neighbor] + if not self.zeroTest(val): + stack.append(neighbor) + else: + for neighbor in range(self.dim): + if not self.zeroTest(self.mat[v, neighbor]) and neighbor not in self.components: + stack.append(neighbor) + + + def CommunicatingClasses(self): + """Apply Kosaraju's algorithm to compute the set of communicating classes, + espressed as a dictionary of representatives from each class with the + associate values given by the classes, as arrays. + """ + + if (self.communicatingClasses): + return self.communicatingClasses + + self.visited = [] + self.sortedVertices = [] + self.components = {} + + for vertex in range(self.dim): + self.Visit(vertex) + + for vertex in self.sortedVertices: + self.Assign(vertex, vertex) + + self.communicatingClasses = {} + for vertex in range(self.dim): + root = self.components[vertex] + if root in self.communicatingClasses: + self.communicatingClasses[root].append(vertex) + else: + self.communicatingClasses[root] = [vertex] + + return self.communicatingClasses + + + def ClosedClasses(self): + + if (self.closedClasses): + return self.closedClasses + + if(not self.communicatingClasses): + self.communicatingClasses = self.CommunicatingClasses() + + self.closedClasses = {} + for cc in self.communicatingClasses.keys(): + + # Assume that it is closed for now + self.closedClasses[cc] = self.communicatingClasses[cc] + for source in self.communicatingClasses[cc]: + if self.is_sparse: + col = self.mat.getcol(source) + nbrs = col.nonzero()[0] + outside_found = False + for target in nbrs: + if target in self.communicatingClasses[cc]: + continue + val = self.mat[target, source] + if not self.zeroTest(val): + outside_found = True + break + if outside_found: + self.closedClasses.pop(cc) + break + else: + for target in range(self.dim): + if target not in self.communicatingClasses[cc] and \ + not self.zeroTest(self.mat[target, source]): + self.closedClasses.pop(cc) + break + + if cc not in self.closedClasses: + break + + return self.closedClasses + +import numpy as np +if __name__ == '__main__': + m = np.array( + [2/3, 1/4, 0, 2/3, 0, + 1/3, 3/4, 0, 0, 0, + 0, 0, 3/5, 1/6, 4/7, + 0, 0, 2/5, 1/2, 0, + 0, 0, 0, 0, 3/7]).reshape(5, 5) + + + def zeroTest(x): + return x == 0 + + g = Graph(m, zeroTest) + print("np matrix:") + print(g.CommunicatingClasses()) + print(g.ClosedClasses()) + + + m_sparse = sps.csr_matrix( + np.array( + [2/3, 1/4, 0, 2/3, 0, + 1/3, 3/4, 0, 0, 0, + 0, 0, 3/5, 1/6, 4/7, + 0, 0, 2/5, 1/2, 0, + 0, 0, 0, 0, 3/7]).reshape(5, 5) + ) + + + g = Graph(m_sparse, zeroTest) + print("sparse csr matrix:") + print(g.CommunicatingClasses()) + print(g.ClosedClasses()) \ No newline at end of file diff --git a/open_spiel/python/egt/original_ssd.py b/open_spiel/python/egt/original_ssd.py new file mode 100644 index 0000000000..270065c346 --- /dev/null +++ b/open_spiel/python/egt/original_ssd.py @@ -0,0 +1,521 @@ +import numpy as np +from numpy.polynomial import Polynomial as P +from kosaraju import * +import sympy as sp +from sympy import * +from sympy.abc import t +from sympy.functions.elementary.piecewise import Undefined +from collections import deque +import pyspiel + +# =============================== +# SSD ALGORITHM from John +# =============================== + +def stableDistribution(M): + """Computes the normalized eigenvector for eigenvalue 1 for a unichain Markov matrix.""" + (eigs, vects) = np.linalg.eig(M) + idx = (eigs == eigs.max()).nonzero() + v = np.real(np.transpose(vects)[idx][0]) + return normalize(v) + +def polyPrint(arg): + """A helper function for printing polynomials.""" + return sp.Poly(arg.coef, t).as_expr() + +print_polyArray = np.frompyfunc(polyPrint, 1, 1) + +def polyEval(arg, val): + """A helper function for evaluating polynomials.""" + return np.polyval(arg, val) if isinstance(arg, np.poly1d) else arg + +evalPolyArray = np.frompyfunc(polyEval, 2, 1) + +def zeroTest(x): + """A helper function for testing a scalar against 0.""" + return x == 0 + +def polyZeroTest(p): + """A helper function for testing a polynomial against 0.""" + return p == np.poly1d([0]) + +def shiftExponent(p, degree): + """A helper function for lowering the degree of a polynomial.""" + coefs = list(p.c) + p_degree = len(coefs) - 1 + if (p_degree < degree): + return np.poly1d([]) + del coefs[p_degree - degree + 1 : p_degree + 1] + return np.poly1d(coefs) + +def costResistance(p): + """A helper function for computing the cost and resistance.""" + coefs = p.c + l = len(coefs) + if (l == 1 and (0 == coefs[0])): + return {'cost': 1, 'resistance': 'infinity'} + i = l - 1 + while (i > 0) and (coefs[i] == 0): + i -= 1 + return {'cost': coefs[i], 'resistance': l - i - 1} + +def hasZeroOnDiagonalP(mat, zeroTest): + """A helper function for testing whether a matrix has a zero on its diagonal.""" + result = False + for col in range(mat.shape[1]): + if (zeroTest(mat[col, col])): + result = True + return result + +def uniformScale(mat, zeroTest): + """A helper function for applying a default uniform scaling factor to a PMM, if necessary.""" + if (not hasZeroOnDiagonalP(mat, zeroTest)): + return mat + poly_one = np.poly1d([1]) + return 0.5 * (mat + poly_one*np.identity(mat.shape[0])) + +def dropNonZeroR(alg): + """A helper function for adjusting the supplied matrix.""" + return alg + +def nonUniformScale(mat): + """If possible, applies a non-uniform scaling to mat.""" + result = {} + result['dim'] = mat.shape[1] + dim = range(result['dim']) + + minResistance = -1 + maxCostSum = 0 + nonTransientCols = list(dim) + + for col in dim: + minColResistance = -1 + colCostSum = 0 + for row in dim: + if (row == col): + continue + cr = costResistance(mat[row, col]) + if (cr['resistance'] == 'infinity'): + continue + if (cr['resistance'] == 0): + minColResistance = 0 + break + elif (minColResistance < 0 or cr['resistance'] < minColResistance): + minColResistance = cr['resistance'] + colCostSum = cr['cost'] + elif (cr['resistance'] == minColResistance): + colCostSum += cr['cost'] + + if (minColResistance == 0): + nonTransientCols.remove(col) + elif (minColResistance > 0): + if ((minResistance < 0) or (minColResistance < minResistance)): + minResistance = minColResistance + maxCostSum = colCostSum + elif (minColResistance == minResistance): + maxCostSum += colCostSum + + if (len(nonTransientCols) == 0): + raise Exception("No scaling is possible.") + + result['mat'] = np.array(mat, copy=True) + new_mat = result['mat'] + result['D'] = np.array(np.identity(result['dim']), dtype=object) + Dmatrix = result['D'] + del result['dim'] + + f_cost = 2 * maxCostSum + tmp = [f_cost] + tmp.extend([0] * minResistance) + f = np.poly1d(tmp) + for col in dim: + if (col not in nonTransientCols): + Dmatrix[col, col] = f + + poly_one = np.poly1d([1]) + for col in dim: + if (col in nonTransientCols): + for row in dim: + if (row == col): + new_mat[row, col] = poly_one + (poly_one/f_cost) * shiftExponent(mat[row, col] - poly_one, minResistance) + else: + new_mat[row, col] = (poly_one/f_cost) * shiftExponent(mat[row, col], minResistance) + return result + +def reduce(mat, s, M0 = None): + """Eliminates all the states of mat in s.""" + result = {} + if (len(s) == 0): + raise Exception("s cannot be empty.") + + dim = mat.shape[1] + if M0 is None: + M0 = np.array(evalPolyArray(mat,0), dtype=float) + + sbar = list(set(range(dim)).symmetric_difference(set(s))) + perm = list(sbar) + perm.extend(s) + P = 0*np.identity(dim) + for col in range(dim): + P[perm[col], col] = 1 + + Mssbar = mat[np.ix_(s, sbar)] + LambdassInv = np.linalg.inv(M0[np.ix_(s, s)] - np.identity(len(s))) + result['i'] = P@np.block([[np.identity(len(sbar))], [-LambdassInv@Mssbar]]) + tmp = mat[np.ix_(sbar, sbar)] - mat[np.ix_(sbar, s)]@LambdassInv@Mssbar + + dim = tmp.shape[1] + zero = np.poly1d([0]) + one = np.poly1d([1]) + for col in range(dim): + sum = zero + for row in range(dim): + if (row == col): + continue + cr = costResistance(tmp[row, col]) + if cr['resistance'] == 'infinity': + tmp[row, col] = zero + else: + tmp[row, col] = [0] * (cr['resistance'] + 1) + tmp[row, col][0] = cr['cost'] + tmp[row, col] = np.poly1d(tmp[row, col]) + sum = sum + tmp[row, col] + tmp[col, col] = one - sum + + result['mat'] = tmp + return result + +def normalize(v): + """Divides a vector by its sum, so it sums to 1.""" + return v/np.sum(v) + +def SSD_step(mat): + """Performs the next possible reduction step in the SSD algorithm.""" + result = {} + M0 = np.array(evalPolyArray(mat,0), dtype=float) + G = Graph(M0, zeroTest) + communicating = G.CommunicatingClasses() + + if (len(communicating) == 1): + result['stab'] = stableDistribution(M0) + return result + + closed = G.ClosedClasses() + maxSize = 0 + maxClass = None + numNonTrivial = 0 + for x in closed: + classLen = len(closed[x]) + if classLen > 1: + numNonTrivial = numNonTrivial + 1 + if classLen > maxSize: + maxSize = classLen + maxClass = closed[x] + + s = list(maxClass) + s.pop(0) + if (numNonTrivial > 0): + return reduce(mat, s, M0) + + return nonUniformScale(mat) + +def SSD_iterate(mat): + """Recursively apply SSD_step to compute the stable distribution.""" + result = SSD_step(mat) + + if ('stab' in result): + return result['stab'] + + if ('i' in result): + return result['i']@SSD_iterate(result['mat']) + + return result['D']@SSD_iterate(result['mat']) + +def SSD(mat): + """Computes the SSD of a PMM.""" + g = Graph(mat, polyZeroTest) + if len(g.CommunicatingClasses().keys()) > 1: + raise Exception("Input must be unichain.") + + stab = SSD_iterate(mat) + return normalize(np.array(evalPolyArray(stab,0), dtype=float)) + +# ======================================== +# ADAPTIVE LEARNING SIMULATION +# ======================================== + +def get_payoff_matrices(game): + """Extract payoff matrices from PySpiel game.""" + n_actions = game.num_distinct_actions() + num_players = game.num_players() + + game1 = np.zeros((n_actions, n_actions)) + game2 = np.zeros((n_actions, n_actions)) + + state = game.new_initial_state() + + if state.is_simultaneous_node(): + for action1 in range(n_actions): + for action2 in range(n_actions): + temp_state = game.new_initial_state() + temp_state.apply_actions([action1, action2]) + + if temp_state.is_terminal(): + returns = temp_state.returns() + game1[action1, action2] = returns[0] + if num_players > 1: + game2[action2, action1] = returns[1] + + return game1, game2 + +def joint_state_to_index(history1, history2, n_actions, m): + """Convert joint histories to state index.""" + if len(history1) < m or len(history2) < m: + return 0 + + index1 = 0 + for i, action in enumerate(history1[-m:]): + index1 += action * (n_actions ** (m - 1 - i)) + + index2 = 0 + for i, action in enumerate(history2[-m:]): + index2 += action * (n_actions ** (m - 1 - i)) + + return index1 + index2 * (n_actions ** m) + +def index_to_joint_state(index, n_actions, m): + """Convert state index back to joint histories.""" + n_states_per_player = n_actions ** m + + index2 = index // n_states_per_player + index1 = index % n_states_per_player + + history1 = [] + temp_index1 = index1 + for i in range(m): + history1.append(temp_index1 % n_actions) + temp_index1 //= n_actions + history1 = list(reversed(history1)) + + history2 = [] + temp_index2 = index2 + for i in range(m): + history2.append(temp_index2 % n_actions) + temp_index2 //= n_actions + history2 = list(reversed(history2)) + + return history1, history2 + +def compute_best_response_probs(game_matrix, opponent_mixed_strategy): + """Compute probability distribution over best responses.""" + expected_payoffs = game_matrix @ opponent_mixed_strategy + max_payoff = np.max(expected_payoffs) + best_actions = np.where(expected_payoffs == max_payoff)[0] + + action_probs = np.zeros(len(expected_payoffs)) + action_probs[best_actions] = 1.0 / len(best_actions) + + return action_probs + +def compute_best_response(game_matrix, opponent_mixed_strategy, epsilon=0.0): + """Compute best response to opponent's mixed strategy.""" + expected_payoffs = game_matrix @ opponent_mixed_strategy + max_payoff = np.max(expected_payoffs) + best_actions = np.where(expected_payoffs == max_payoff)[0] + + action_probs = np.zeros(len(expected_payoffs)) + + if epsilon == 0: + action_probs[best_actions] = 1.0 / len(best_actions) + else: + action_probs += epsilon / len(action_probs) + action_probs[best_actions] += (1 - epsilon) / len(best_actions) + + action = np.random.choice(len(action_probs), p=action_probs) + return action, action_probs + +def construct_perturbed_markov_matrix(game, m, s): + """Construct the perturbed Markov matrix for adaptive learning dynamics.""" + game1, game2 = get_payoff_matrices(game) + n_actions = game.num_distinct_actions() + n_states = (n_actions ** m) ** 2 + + # Create symbolic variable for epsilon + x = np.poly1d([1, 0]) # epsilon + p_one = np.poly1d([1]) # 1 + p_zero = np.poly1d([0]) # 0 + + M = np.zeros((n_states, n_states), dtype=object) + + for state_idx in range(n_states): + hist_p0_sees_p1, hist_p1_sees_p0 = index_to_joint_state(state_idx, n_actions, m) + + for next_state_idx in range(n_states): + hist_p0_sees_p1_next, hist_p1_sees_p0_next = index_to_joint_state(next_state_idx, n_actions, m) + + transition_prob = p_zero + + for action_p0 in range(n_actions): + for action_p1 in range(n_actions): + expected_hist_p0_sees_p1 = hist_p0_sees_p1[1:] + [action_p1] if m > 1 else [action_p1] + expected_hist_p1_sees_p0 = hist_p1_sees_p0[1:] + [action_p0] if m > 1 else [action_p0] + + if (expected_hist_p0_sees_p1 == hist_p0_sees_p1_next and + expected_hist_p1_sees_p0 == hist_p1_sees_p0_next): + + if m == 0: + action_prob = (p_one / n_actions) * (p_one / n_actions) + else: + # Player 0's empirical distribution of Player 1's actions + pi_p1 = np.zeros(n_actions) + for a in hist_p0_sees_p1: + pi_p1[a] += 1.0 / len(hist_p0_sees_p1) + + # Player 1's empirical distribution of Player 0's actions + pi_p0 = np.zeros(n_actions) + for a in hist_p1_sees_p0: + pi_p0[a] += 1.0 / len(hist_p1_sees_p0) + + # Best response probabilities + p0_best_responses = compute_best_response_probs(game1, pi_p1) + p1_best_responses = compute_best_response_probs(game2, pi_p0) + + # Action probabilities with experimentation + p0_action_prob = (p_one - x) * p0_best_responses[action_p0] + x / n_actions + p1_action_prob = (p_one - x) * p1_best_responses[action_p1] + x / n_actions + + action_prob = p0_action_prob * p1_action_prob + + transition_prob = transition_prob + action_prob + + M[next_state_idx, state_idx] = transition_prob + + return M + +def adaptive_learning_stationary(game, m, s, T=100000, epsilon=0.1): + """Simulate adaptive learning and return the stationary distribution.""" + game1, game2 = get_payoff_matrices(game) + n_actions = game.num_distinct_actions() + n_joint_states = (n_actions ** m) ** 2 + + state_visits = np.zeros(n_joint_states) + + history_p0_sees_p1 = deque(maxlen=m) + history_p1_sees_p0 = deque(maxlen=m) + + for i in range(T + m): + if len(history_p0_sees_p1) == 0 or len(history_p1_sees_p0) == 0: + action_p0 = np.random.randint(0, n_actions) + action_p1 = np.random.randint(0, n_actions) + else: + sample_size_p0 = min(s, len(history_p0_sees_p1)) + sample_size_p1 = min(s, len(history_p1_sees_p0)) + + p0_sampled_actions = np.random.choice(list(history_p0_sees_p1), sample_size_p0, replace=False) + p1_sampled_actions = np.random.choice(list(history_p1_sees_p0), sample_size_p1, replace=False) + + pi_p1 = np.histogram(p0_sampled_actions, bins=range(n_actions + 1))[0] / sample_size_p0 + pi_p0 = np.histogram(p1_sampled_actions, bins=range(n_actions + 1))[0] / sample_size_p1 + + action_p0, _ = compute_best_response(game1, pi_p1, epsilon) + action_p1, _ = compute_best_response(game2, pi_p0, epsilon) + + history_p0_sees_p1.append(action_p1) + history_p1_sees_p0.append(action_p0) + + if i >= m and len(history_p0_sees_p1) == m and len(history_p1_sees_p0) == m: + joint_state_idx = joint_state_to_index(list(history_p0_sees_p1), list(history_p1_sees_p0), n_actions, m) + state_visits[joint_state_idx] += 1 + + return state_visits / np.sum(state_visits) + + +def compare_ssd_and_simulation(game, m, s, T=10000, epsilon=0.01): + """Compare SSD analytical result with simulation. + + What fraction of the time does SSD say we'll be playing a strategy + versus what fraction do we actually play that strategy + """ + print(f"Comparing SSD vs Simulation for m={m}, s={s}, epsilon={epsilon}") + + # Get SSD result (analytical) + print("Computing SSD (analytical)...") + try: + M = construct_perturbed_markov_matrix(game, m, s) + ssd_result = SSD(M) + except Exception as e: + print(f"SSD failed: {e}") + return None + + # Get simulation result (empirical) + print("Running simulation...") + sim_result = adaptive_learning_stationary(game, m, s, T, epsilon) + + # Compare results + n_actions = game.num_distinct_actions() + n_joint_states = (n_actions ** m) ** 2 + + print(f"\nResults comparison:") + print(f"Number of joint states: {n_joint_states}") + print(f"SSD result shape: {ssd_result.shape}") + print(f"Simulation result shape: {sim_result.shape}") + + # Print state-by-state comparison + print(f"\nState-by-state comparison:") + print(f"{'State':<6} {'P0_hist':<10} {'P1_hist':<10} {'SSD':<10} {'Simulation':<10} {'Diff':<10}") + print("-" * 70) + + max_diff = 0 + for state_idx in range(n_joint_states): + hist1, hist2 = index_to_joint_state(state_idx, n_actions, m) + ssd_prob = ssd_result[state_idx] + sim_prob = sim_result[state_idx] + diff = abs(ssd_prob - sim_prob) + max_diff = max(max_diff, diff) + + if ssd_prob > 0.01 or sim_prob > 0.01: # Only show significant states + print(f"{state_idx:<6} {str(hist1):<10} {str(hist2):<10} {ssd_prob:<10.4f} {sim_prob:<10.4f} {diff:<10.4f}") + + # Summary statistics + mse = np.mean((ssd_result - sim_result) ** 2) + mae = np.mean(np.abs(ssd_result - sim_result)) + + print(f"\nSummary:") + print(f"Maximum absolute difference: {max_diff:.6f}") + print(f"Mean absolute error (MAE): {mae:.6f}") + print(f"Mean squared error (MSE): {mse:.6f}") + print(f"Correlation: {np.corrcoef(ssd_result, sim_result)[0,1]:.6f}") + + return { + 'ssd_result': ssd_result, + 'sim_result': sim_result, + 'max_diff': max_diff, + 'mae': mae, + 'mse': mse, + 'correlation': np.corrcoef(ssd_result, sim_result)[0,1] + } + +def main(): + """Main function to run the comparison.""" + # Load matching pennies game + # game = pyspiel.load_game('matrix_mp') + game = pyspiel.load_game('matrix_bos') + + # Test with different parameters + test_cases = [ + (1, 1), # Small case + (2, 1), # Medium case + (2, 2), # Larger case + # (4, 2) + ] + + for m, s in test_cases: + print(f"\n{'='*60}") + print(f"Testing m={m}, s={s}") + print('='*60) + + results = compare_ssd_and_simulation(game, m, s, T=100000, epsilon=0.001) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/open_spiel/python/egt/ssd.py b/open_spiel/python/egt/ssd.py new file mode 100644 index 0000000000..6a302dcd5d --- /dev/null +++ b/open_spiel/python/egt/ssd.py @@ -0,0 +1,796 @@ +"""Implementation of Stochastically Stable Distribution (SSD) analysis. + +This module implements the SSD algorithm for computing stochastically stable +distributions of perturbed Markov processes, with applications to evolutionary +game theory and multiagent learning. + +Based on: +"An Algorithm for Computing Stochastically Stable Distributions with +Applications to Multiagent Learning in Repeated Games" +by John R. Wicks and Amy Greenwald. +https://arxiv.org/pdf/1207.1424 +""" + +from numbers import Number +from collections import defaultdict +from typing import Any, Dict, List, Optional, Tuple, Union +import warnings + +import numpy as np +from numpy.polynomial import Polynomial as P +import scipy.sparse as sp +import scipy.sparse.linalg as spla + +from open_spiel.python.egt import utils +from open_spiel.python.egt.kosaraju import Graph +from open_spiel.python.egt import alpharank + +def _convert_alpharank_to_ssd_format( + payoff_tables: List[Any], + payoffs_are_hpt_format: bool) -> Dict[str, Any]: + """Converts Alpha-Rank payoff tables to SSD format.""" + num_strats = utils.get_num_strats_per_population(payoff_tables, + payoffs_are_hpt_format) + if payoffs_are_hpt_format: + num_profiles = len(payoff_tables[0]) + elif len(payoff_tables) == 1: + num_profiles = payoff_tables[0].shape[0] + else: + num_profiles = utils.get_num_profiles( + [table.shape[0] for table in payoff_tables]) + return { + "payoff_tables": payoff_tables, + "payoffs_are_hpt_format": payoffs_are_hpt_format, + "num_populations": len(payoff_tables), + "num_strats_per_population": num_strats, + "num_profiles": num_profiles, + } + +def _validate_ssd_distribution(distribution: np.ndarray, + tolerance: float = 1e-6) -> bool: + return (np.all(distribution >= -tolerance) and + abs(np.sum(distribution) - 1.0) <= tolerance) + +def normalize(vector: Union[np.ndarray, sp.spmatrix]) -> np.ndarray: + """Normalize a vector so that it sums to 1.""" + if sp.issparse(vector): + arr = np.array(vector.toarray(), dtype=float).flatten() + else: + arr = np.array(vector, dtype=float).flatten() + total = np.sum(arr) + if total == 0: + return arr + return arr / total + + +def _select_stationary_vector(eigenvalues, eigenvectors): + """Select eigenvector associated with the largest real eigenvalue.""" + eigenvalues = np.asarray(eigenvalues) + idx = np.argmax(np.real(eigenvalues)) + vec = np.real(eigenvectors[:, idx]) + return vec + + +# TODO: check if this works +def _stable_distribution(matrix, tol: float = 1e-9): + """Computes the normalized eigenvector for eigenvalue 1 for a unichain Markov matrix.""" + is_sparse = sp.issparse(matrix) + if is_sparse: + row_sums = np.asarray(matrix.sum(axis=1)).reshape(-1) + col_sums = np.asarray(matrix.sum(axis=0)).reshape(-1) + target_dense = None + else: + target_dense = np.asarray(matrix, dtype=float) + row_sums = target_dense.sum(axis=1) + col_sums = target_dense.sum(axis=0) + + rows_stochastic = np.allclose(row_sums, 1.0, atol=tol) + cols_stochastic = np.allclose(col_sums, 1.0, atol=tol) + + # fix for small matrix games ? + use_transpose = True + if cols_stochastic: + use_transpose = False + elif rows_stochastic: + use_transpose = True + + if is_sparse: + target_matrix = matrix.T if use_transpose else matrix + eig_vals, eig_vecs = spla.eigs(target_matrix, k=1, which="LM") + else: + if use_transpose: + target_matrix = target_dense.T + else: + target_matrix = target_dense + eig_vals, eig_vecs = np.linalg.eig(target_matrix) + + vec = _select_stationary_vector(eig_vals, eig_vecs) + return normalize(vec) + + +def _poly_eval(arg, val): + return np.polyval(arg, val) if isinstance(arg, np.poly1d) else arg + +_eval_poly_array = np.frompyfunc(_poly_eval, 2, 1) + +def _zero_test(value): + return value == 0 + +def _poly_zero_test(poly): + """Returns true if the polynomial is 0.""" + if isinstance(poly, np.poly1d): + return poly == np.poly1d([0]) + return poly == 0 + + +_POLY_ZERO = np.poly1d([0]) +_POLY_ONE = np.poly1d([1]) + + +def _ensure_poly1d(value: Any) -> np.poly1d: + """Return a numpy.poly1d representation for the provided value.""" + if isinstance(value, np.poly1d): + return value + if isinstance(value, P): + return np.poly1d(value.coef) + if isinstance(value, np.ndarray): + if value.size == 1: + return _ensure_poly1d(value.item()) + if isinstance(value, Number): + return np.poly1d([float(value)]) + if value is None: + return _POLY_ZERO + return np.poly1d(value) + + +def _evaluate_sparse_constant(matrix: Any, + eps_value: float = 0.0) -> sp.csr_matrix: + """Evaluate a (possibly SparsePolyMatrix) sparse polynomial matrix at epsilon.""" + if _is_sparse_poly_matrix(matrix): + rows = [] + cols = [] + data = [] + for (row, col), entry in matrix.items(): + value = float(_ensure_poly1d(entry)(eps_value)) + if value == 0: + continue + rows.append(row) + cols.append(col) + data.append(value) + return sp.csr_matrix((data, (rows, cols)), shape=matrix.shape) + + if not matrix.nnz: # not custom class + return sp.csr_matrix(matrix.shape, dtype=float) + coo = matrix.tocoo() + data = np.empty_like(coo.data, dtype=float) + for idx, entry in enumerate(coo.data): + poly = _ensure_poly1d(entry) + data[idx] = float(poly(eps_value)) + return sp.csr_matrix((data, (coo.row, coo.col)), shape=matrix.shape) + +def _set_sparse_value(matrix: Any, + row: int, + col: int, + value: Any, + tol: float = 1e-16): + """Assign or remove (when value=0) to row, col in a sparse matrix.""" + if _is_sparse_poly_matrix(matrix): + matrix.set(row, col, value, tol) + return + if isinstance(matrix, sp.dok_matrix): + if _is_zero_poly_entry(value, tol=tol): + if (row, col) in matrix: + del matrix[row, col] + else: + matrix[row, col] = value + return + raise TypeError("Unsupported sparse structure in _set_sparse_value") + + +class SparsePolyMatrix: + """dictionary based sparse matrix to store polynomial entries.""" + + __slots__ = ("shape", "_data") + + def __init__(self, shape: Tuple[int, int], + data: Optional[Dict[Tuple[int, int], Any]] = None): + self.shape = shape + self._data = {} if data is None else dict(data) + + def copy(self): + return SparsePolyMatrix(self.shape, self._data) + + def get(self, key: Tuple[int, int], default: Any = _POLY_ZERO) -> Any: + return self._data.get(key, default) + + def set(self, row: int, col: int, value: Any, tol: float = 1e-16): + if _is_zero_poly_entry(value, tol=tol): + self._data.pop((row, col), None) + else: + self._data[(row, col)] = value + + def items(self): + return self._data.items() + + def values(self): + return self._data.values() + + def nnz(self) -> int: + return len(self._data) + + def __getitem__(self, key: Tuple[int, int]) -> Any: + if isinstance(key, Tuple[int, int]) and len(key) == 2: + return self.get((key[0], key[1]), _POLY_ZERO) + if isinstance(key, Tuple[np.ndarray]): + print("AM IN HERE") + raise TypeError("SparsePolyMatrix can get 2d index or np.ix_ output only") + + def __setitem__(self, key: Tuple[int, int], value: Any): + if isinstance(key, tuple) and len(key) == 2: + self.set(key[0], key[1], value) + return + raise TypeError("SparsePolyMatrix can set 2d index only") + +def _is_sparse_poly_matrix(matrix: Any) -> bool: + return isinstance(matrix, SparsePolyMatrix) + +def _sparse_poly_indicator(matrix: SparsePolyMatrix) -> sp.csr_matrix: + """Creates a spare adjacency matrix""" + rows = [] + cols = [] + for (row, col), value in matrix.items(): + if not _poly_zero_test(value): + rows.append(row) + cols.append(col) + data = np.ones(len(rows), dtype=float) + return sp.csr_matrix((data, (rows, cols)), shape=matrix.shape) + + +def _make_linear_poly(a0: float, a1: float, tol: float = 1e-14): + """Returns either the linear polynomial a0 + a1 * eps or just a0 if a1 approx 0.""" + if abs(a1) <= tol: + return float(a0) + return np.poly1d([a1, a0]) + + +def _shift_exponent(poly, degree): + """Lowers the degree of poly.""" + coefs = list(poly.c) + poly_degree = len(coefs) - 1 + if poly_degree < degree: + return np.poly1d([]) + del coefs[poly_degree - degree + 1: poly_degree + 1] + return np.poly1d(coefs) + + +def _cost_resistance(poly): + """Return the cost and resistance of poly.""" + poly = _ensure_poly1d(poly) + coefs = poly.c + if len(coefs) == 1 and coefs[0] == 0: + return {"cost": 1, "resistance": "infinity"} + i = len(coefs) - 1 + while i > 0 and coefs[i] == 0: + i -= 1 + return {"cost": coefs[i], "resistance": len(coefs) - i - 1} + + +def _non_uniform_scale(matrix): + if _is_sparse_poly_matrix(matrix) or sp.issparse(matrix): + return _non_uniform_scale_sparse(matrix) + return _non_uniform_scale_dense(matrix) + + +def _non_uniform_scale_dense(matrix): + result = {"dim": matrix.shape[1]} + dim_range = range(result["dim"]) + min_resistance = -1 + max_cost_sum = 0 + non_transient_cols = list(dim_range) + + for col in dim_range: + min_col_resistance = -1 + col_cost_sum = 0 + for row in dim_range: + if row == col: + continue + cr = _cost_resistance(matrix[row, col]) + if cr["resistance"] == "infinity": + continue + if cr["resistance"] == 0: + min_col_resistance = 0 + break + if min_col_resistance < 0 or cr["resistance"] < min_col_resistance: + min_col_resistance = cr["resistance"] + col_cost_sum = cr["cost"] + elif cr["resistance"] == min_col_resistance: + col_cost_sum += cr["cost"] + + if min_col_resistance == 0: + non_transient_cols.remove(col) + elif min_col_resistance > 0: + if min_resistance < 0 or min_col_resistance < min_resistance: + min_resistance = min_col_resistance + max_cost_sum = col_cost_sum + elif min_col_resistance == min_resistance: + max_cost_sum += col_cost_sum + + if not non_transient_cols: + raise ValueError("No scaling is possible.") + + result["mat"] = np.array(matrix, copy=True) + new_mat = result["mat"] + result["D"] = np.array(np.identity(result["dim"]), dtype=object) + d_matrix = result["D"] + del result["dim"] + + f_cost = 2 * max_cost_sum + tmp = [f_cost] + tmp.extend([0] * min_resistance) + scaling_poly = np.poly1d(tmp) + for col in dim_range: + if col not in non_transient_cols: + d_matrix[col, col] = scaling_poly + + poly_one = np.poly1d([1]) + for col in dim_range: + if col in non_transient_cols: + for row in dim_range: + if row == col: + new_mat[row, col] = poly_one + (poly_one / f_cost) * _shift_exponent( + matrix[row, col] - poly_one, min_resistance) + else: + new_mat[row, col] = (poly_one / f_cost) * _shift_exponent( + matrix[row, col], min_resistance) + return result + + +def _non_uniform_scale_sparse(matrix): + source = matrix + dim = source.shape[0] + column_entries = defaultdict(list) + for (row, col), value in source.items(): + if row == col: + continue + column_entries[col].append((row, value)) + + min_resistance = -1 + max_cost_sum = 0 + non_transient_cols = set(range(dim)) + + for col in range(dim): + min_col_resistance = -1 + col_cost_sum = 0 + for row, value in column_entries.get(col, []): + cr = _cost_resistance(value) + if cr["resistance"] == "infinity": + continue + if cr["resistance"] == 0: + min_col_resistance = 0 + break + if min_col_resistance < 0 or cr["resistance"] < min_col_resistance: + min_col_resistance = cr["resistance"] + col_cost_sum = cr["cost"] + elif cr["resistance"] == min_col_resistance: + col_cost_sum += cr["cost"] + + if min_col_resistance == 0: + non_transient_cols.discard(col) + elif min_col_resistance > 0: + if min_resistance < 0 or min_col_resistance < min_resistance: + min_resistance = min_col_resistance + max_cost_sum = col_cost_sum + elif min_col_resistance == min_resistance: + max_cost_sum += col_cost_sum + + if not non_transient_cols: + raise ValueError("No scaling is possible.") + + new_mat = source.copy() + identity = np.array(np.identity(dim), dtype=object) + f_cost = 2 * max_cost_sum + tmp = [f_cost] + tmp.extend([0] * min_resistance) + scaling_poly = np.poly1d(tmp) + + for col in range(dim): + if col not in non_transient_cols: + identity[col, col] = scaling_poly + continue + diag_orig = _ensure_poly1d(source.get((col, col), _POLY_ZERO)) + diag_new = _POLY_ONE + (_POLY_ONE / f_cost) * _shift_exponent( + diag_orig - _POLY_ONE, min_resistance) + _set_sparse_value(new_mat, col, col, diag_new) + for row, value in column_entries.get(col, []): + updated = (_POLY_ONE / f_cost) * _shift_exponent( + _ensure_poly1d(value), min_resistance) + _set_sparse_value(new_mat, row, col, updated) + + return { + "mat": new_mat, + "D": identity + } + + +def _reduce(matrix, states_to_remove, M0=None): + """Eliminate selected states from matrix.""" + if _is_sparse_poly_matrix(matrix) or sp.issparse(matrix): + return _reduce_sparse(matrix, states_to_remove, M0) + return _reduce_dense(matrix, states_to_remove, M0) + + +def _reduce_dense(matrix, states_to_remove, M0=None): + if not states_to_remove: + raise ValueError("states_to_remove cannot be empty.") + + dim = matrix.shape[1] + if M0 is None: + M0 = np.array(_eval_poly_array(matrix, 0), dtype=float) + + complement = list(set(range(dim)).symmetric_difference(set(states_to_remove))) + perm = list(complement) + perm.extend(states_to_remove) + P_matrix = np.zeros((dim, dim)) + for col in range(dim): + P_matrix[perm[col], col] = 1 + + Mssbar = matrix[np.ix_(states_to_remove, complement)] + lambdass_inv = np.linalg.inv(M0[np.ix_(states_to_remove, states_to_remove)] - + np.identity(len(states_to_remove))) + inclusion = P_matrix @ np.block([[np.identity(len(complement))], + [-lambdass_inv @ Mssbar]]) + tmp = matrix[np.ix_(complement, complement)] - matrix[np.ix_( + complement, states_to_remove)] @ lambdass_inv @ Mssbar + + dim_tmp = tmp.shape[1] + zero = np.poly1d([0]) + one = np.poly1d([1]) + for col in range(dim_tmp): + col_sum = zero + for row in range(dim_tmp): + if row == col: + continue + cr = _cost_resistance(tmp[row, col]) + if cr["resistance"] == "infinity": + tmp[row, col] = zero + else: + coeffs = [0] * (cr["resistance"] + 1) + coeffs[0] = cr["cost"] + tmp[row, col] = np.poly1d(coeffs) + col_sum = col_sum + tmp[row, col] + tmp[col, col] = one - col_sum + + return {"i": inclusion, "mat": tmp} + +def _reduce_sparse(matrix, states_to_remove, M0=None): + if not states_to_remove: + raise ValueError("states_to_remove cannot be empty.") + + dim = matrix.shape[1] + if M0 is None: + M0 = _evaluate_sparse_constant(matrix, 0) + + complement = list(set(range(dim)).symmetric_difference(set(states_to_remove))) + perm = list(complement) + perm.extend(states_to_remove) + P_matrix = sp.dok_matrix((dim, dim), dtype=float) + for col in range(dim): + P_matrix[perm[col], col] = 1 + + Mssbar = matrix[np.ix_(states_to_remove, complement)] + lambdass_inv = spla.inv(M0[np.ix_(states_to_remove, states_to_remove)] - + sp.identity(len(states_to_remove))) + inclusion = P_matrix @ sp.block_array([[sp.identity(len(complement))], + [-lambdass_inv @ Mssbar]]) + tmp = matrix[np.ix_(complement, complement)] - matrix[np.ix_( + complement, states_to_remove)] @ lambdass_inv @ Mssbar + + dim_tmp = tmp.shape[1] + zero = np.poly1d([0]) + one = np.poly1d([1]) + for col in range(dim_tmp): + col_sum = zero + for row in range(dim_tmp): + if row == col: + continue + cr = _cost_resistance(tmp[row, col]) + if cr["resistance"] == "infinity": + tmp[row, col] = zero + else: + coeffs = [0] * (cr["resistance"] + 1) + coeffs[0] = cr["cost"] + tmp[row, col] = np.poly1d(coeffs) + col_sum = col_sum + tmp[row, col] + tmp[col, col] = one - col_sum + + return {"i": inclusion, "mat": tmp} + + + +def _ssd_step(matrix): + """Performs the next possible reduction step in the SSD algorithm.""" + result = {} + if _is_sparse_poly_matrix(matrix) or sp.issparse(matrix): + M0 = _evaluate_sparse_constant(matrix) + else: + M0 = np.array(_eval_poly_array(matrix, 0), dtype=float) + + graph = Graph(M0, _zero_test) + communicating = graph.CommunicatingClasses() + + if len(communicating) == 1: + result["stab"] = _stable_distribution(M0) + return result + + closed = graph.ClosedClasses() + max_size = 0 + max_class = None + num_non_trivial = 0 + for cls in closed.values(): + class_len = len(cls) + if class_len > 1: + num_non_trivial += 1 + if class_len > max_size: + max_size = class_len + max_class = cls + + states = list(max_class) + states.pop(0) + if num_non_trivial > 0: + return _reduce(matrix, states, M0) + + return _non_uniform_scale(matrix) + + +def _ssd_iterate(matrix): + """Recursively apply SSD_step until convergence.""" + result = _ssd_step(matrix) + if "stab" in result: + return result["stab"] + if "i" in result: + return result["i"] @ _ssd_iterate(result["mat"]) + return result["D"] @ _ssd_iterate(result["mat"]) + + +def _compute_ssd(matrix): + """Computes the SSD of a PMM.""" + if _is_sparse_poly_matrix(matrix): + graph = Graph(_sparse_poly_indicator(matrix), _zero_test) + else: + graph = Graph(matrix, _poly_zero_test) + if len(graph.CommunicatingClasses().keys()) > 1: + raise ValueError("Input must be unichain.") + + stab = _ssd_iterate(matrix) + + if sp.issparse(stab): + vec = np.array(stab.toarray(), dtype=float).flatten() + else: + vec = np.array(_eval_poly_array(stab, 0), dtype=float) + return normalize(vec) + + + +def _is_zero_poly_entry(value, + tol: float = 1e-12, + ignore_infinitesimal: bool = False) -> bool: + """Check whether a polynomial matrix entry is approx zero.""" + if value is None: + return True + if isinstance(value, np.poly1d): + if ignore_infinitesimal: + constant_term = float(value(0.0)) + return abs(constant_term) <= tol + coeffs = np.asarray(value.coeffs, dtype=float) + return np.all(np.abs(coeffs) <= tol) + if isinstance(value, Number): + return abs(value) <= tol + try: + coeffs = np.asarray(value, dtype=float) + if coeffs.size == 0: + return True + return np.all(np.abs(coeffs) <= tol) + except Exception: + return False + + +def _matrix_nnz_and_total(matrix: np.ndarray, + tol: float = 1e-12, + ignore_infinitesimal: bool = False) -> tuple[int, int]: + """Returns non-zero count and total entries for a polynomial matrix.""" + if matrix is None: + return 0, 0 + if _is_sparse_poly_matrix(matrix): + shape = matrix.shape + if ignore_infinitesimal: + nnz = 0 + for value in matrix.values(): + if abs(_ensure_poly1d(value)(0.0)) > tol: + nnz += 1 + else: + nnz = matrix.nnz() + return nnz, int(shape[0] * shape[1]) + if sp.issparse(matrix): + shape = matrix.shape + return int(matrix.getnnz()), int(shape[0] * shape[1]) + + shape = getattr(matrix, "shape", None) + if shape is None or len(shape) != 2: + return 0, 0 + total = int(shape[0] * shape[1]) + + nnz = 0 + for i, j in np.ndindex(shape): + entry = matrix[i, j] + if not _is_zero_poly_entry(entry, tol=tol, + ignore_infinitesimal=ignore_infinitesimal): + nnz += 1 + return nnz, total + + +# TODO: for code review: Go over this and check. +def _construct_polynomial_transition_matrix( + base: Union[np.ndarray, sp.spmatrix], + perturbation_strength: float, + use_sparse: bool) -> Union[np.ndarray, sp.spmatrix]: + """Create polynomial transition matrix from a base. + Here, we use Alpha-Rank's evolutionary dynamics with + eps = 0 as the base matrix. Then, we add epsilon perturbations + according to the SSD algorithm and with poly types. + """ + size = base.shape[0] + if use_sparse: + base_sparse = base if sp.issparse(base) else sp.csc_matrix(base) + matrix = SparsePolyMatrix((size, size)) + for j in range(size): + noise_target = (j + 1) % size if size > 1 else j + col_sum = _POLY_ZERO + processed_rows = set() + col = base_sparse.getcol(j) + for idx, row in enumerate(col.indices): + if row == j: + continue + a0 = float(col.data[idx]) + noise_val = 1.0 if row == noise_target else 0.0 + entry = _make_linear_poly(a0, + perturbation_strength * (noise_val - a0)) + if _poly_zero_test(entry): + continue + matrix.set(row, j, entry) + col_sum = col_sum + entry + processed_rows.add(row) + + if size > 1 and noise_target not in processed_rows and noise_target != j: + entry = _make_linear_poly(0.0, perturbation_strength) + if not _poly_zero_test(entry): + matrix.set(noise_target, j, entry) + col_sum = col_sum + entry + + matrix.set(j, j, _POLY_ONE - col_sum) + + return matrix + + # Dense case + matrix = np.zeros((size, size), dtype=object) + U_val = 1.0 / float(size) if size else 0.0 + for j in range(size): + for i in range(size): + a0 = float(base[i, j]) + noise_val = U_val + a1 = perturbation_strength * (noise_val - a0) + # base[i, j] + epsilon * (U_val - base[i, j]) + # = (1 - epsilon) * base[i, j] + epsilon * U_val + entry = _make_linear_poly(a0, a1) + if isinstance(entry, np.poly1d): + if _poly_zero_test(entry): + continue + elif abs(entry) <= 1e-16: + continue + matrix[i, j] = entry + + for j in range(size): + col_sum = _POLY_ZERO + for i in range(size): + if i == j: + continue + col_sum = col_sum + _ensure_poly1d(matrix[i, j]) + matrix[j, j] = _POLY_ONE - col_sum + + return matrix + + +def _construct_perturbed_markov_matrix_ev_dyn( + payoff_tables: List[Any], + payoffs_are_hpt_format: bool, + perturbation_strength: float = 1.0, + use_sparse: bool = False) -> Union[np.ndarray, sp.spmatrix, SparsePolyMatrix]: + """Constructs perturbed Markov matrix using evolutionary dynamics.""" + + game_info = _convert_alpharank_to_ssd_format( + payoff_tables, payoffs_are_hpt_format) + + num_populations = game_info["num_populations"] + + m = 10.0 + alpha = 50.0 + + if num_populations == 1: + game_is_constant_sum, payoff_sum = utils.check_is_constant_sum( + payoff_tables[0], payoffs_are_hpt_format) + # Use evolutionary dynamics as base with use_inf_alpha False + c_base, _ = alpharank._get_singlepop_transition_matrix( + payoff_tables[0], payoffs_are_hpt_format, + m=m, alpha=alpha, + game_is_constant_sum=game_is_constant_sum, + use_local_selection_model=True, + payoff_sum=payoff_sum, + use_inf_alpha=False, + inf_alpha_eps=0.0, + use_sparse=use_sparse) + # c_base is a (num_strats x num_strats) matrix with numeric entries. + base = c_base if sp.issparse(c_base) else np.array(c_base, dtype=float) + else: + c_base, _ = alpharank._get_multipop_transition_matrix( + payoff_tables, payoffs_are_hpt_format, + m=m, alpha=alpha, + use_inf_alpha=False, + inf_alpha_eps=0.0, + use_sparse=use_sparse) + base = c_base if sp.issparse(c_base) else np.array(c_base, dtype=float) + + # arank: c[current, next] + # SSD: M[next, current] + base = base.transpose() if sp.issparse(base) else base.T + return _construct_polynomial_transition_matrix(base, perturbation_strength, + use_sparse) + + +def compute_ssd(payoff_tables: List[Any], + perturbation_strength: float = 1, + verbose: bool = False, + **kwargs) -> np.ndarray: + """Compute stochastically stable distribution for given payoff tables. + + Args: + payoff_tables: List of game payoff tables. + verbose: Set to True to print intermediate results. + + Returns: + Stochastically stable distribution as numpy array. + """ + payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) + + + num_strats_per_population = utils.get_num_strats_per_population( + payoff_tables, payoffs_are_hpt_format) + if np.array_equal(num_strats_per_population, + np.ones(len(num_strats_per_population))): + rhos = np.asarray([[1]]) + return rhos + + if verbose: + print("Constructing perturbed Markov matrix") + print('num_strats_per_population:', num_strats_per_population) + use_sparse = bool(kwargs.get("use_sparse", False)) + matrix = _construct_perturbed_markov_matrix_ev_dyn( + payoff_tables, payoffs_are_hpt_format, perturbation_strength, + use_sparse=use_sparse) + + if verbose: + print(f"Matrix size: {matrix.shape}") + nnz, total = _matrix_nnz_and_total( + matrix, ignore_infinitesimal=use_sparse) + density = (nnz / total) if total else 0.0 + if verbose and use_sparse: + density_label = "non inf. density" if use_sparse else "density" + print("SSD matrix sparsity: {}/{} non-zero entries ({:.2f}% {}).".format( + nnz, total, density * 100.0, density_label)) + + ssd_distribution = _compute_ssd(matrix) + + if not _validate_ssd_distribution(ssd_distribution): + warnings.warn("ssd distribution validation failed, normalizing") + ssd_distribution = normalize(ssd_distribution) + + return ssd_distribution + diff --git a/open_spiel/python/egt/ssd_test.py b/open_spiel/python/egt/ssd_test.py new file mode 100644 index 0000000000..f3ffe52553 --- /dev/null +++ b/open_spiel/python/egt/ssd_test.py @@ -0,0 +1,82 @@ +"""Tests for open_spiel.python.egt.ssd.""" + +from absl.testing import absltest + +import numpy as np +import pyspiel + +from open_spiel.python.egt import alpharank +from open_spiel.python.egt import ssd +from open_spiel.python.egt import utils + + +class SSDTest(absltest.TestCase): + + def _get_game_payoffs(self, name): + game = pyspiel.load_matrix_game(name) + payoff_tables = utils.game_payoffs_array(game) + _, payoff_tables = utils.is_symmetric_matrix_game(payoff_tables) + return payoff_tables + + def test_matrix_game_distributions(self): + test_games = [ + ("matrix_rps", None), + ("matrix_pd", None), + ("matrix_coordination", None), + ("matrix_bos", None), + ("matrix_brps", None), + ("matrix_cd", None), + ("matrix_mp", None), + ("matrix_rpsw", None), + ("matrix_sh", None), + ("matrix_shapleys_game", None), + ] + for game_name, _ in test_games: + with self.subTest(game=game_name): + payoff_tables = self._get_game_payoffs(game_name) + ssd_dist = ssd.compute_ssd( + payoff_tables, + payoffs_are_hpt_format=False, + perturbation_strength=0.0001, + use_sparse=False) + _, _, alpharank_dist, _, _ = alpharank.compute( + payoff_tables, + m=20, + alpha=0.2) + print(f"Game: {game_name}") + print(" SSD distribution:", ssd_dist) + print(" AlphaRank distribution:", alpharank_dist) + + def test_reduce_logic(self): + for use_sparse in [False, True]: + with self.subTest(use_sparse=use_sparse): + if use_sparse: + matrix = ssd.SparsePolyMatrix((4, 4)) + matrix.set(1, 0, ssd._make_linear_poly(1.0, 0.0)) + matrix.set(2, 1, ssd._make_linear_poly(1.0, 0.0)) + matrix.set(0, 2, ssd._make_linear_poly(1.0, 0.0)) + matrix.set(0, 3, ssd._make_linear_poly(1.0, 0.0)) + + matrix.set(3, 0, ssd._make_linear_poly(0.0, 1.0)) # eps + matrix.set(1, 0, ssd._make_linear_poly(1.0, -1.0)) + + else: + matrix = np.zeros((4, 4), dtype=object) + one = np.poly1d([1]) + eps = np.poly1d([1, 0]) + one_minus_eps = np.poly1d([-1, 1]) + + matrix[1, 0] = one_minus_eps + matrix[3, 0] = eps + matrix[2, 1] = one + matrix[0, 2] = one + matrix[0, 3] = one + + dist = ssd._compute_ssd(matrix) + + expected = np.array([1/3, 1/3, 1/3, 0.0]) + np.testing.assert_allclose(dist, expected, atol=1e-6) + + +if __name__ == "__main__": + absltest.main() diff --git a/open_spiel/python/egt/utils.py b/open_spiel/python/egt/utils.py index 849fc39b0f..93c405f90f 100644 --- a/open_spiel/python/egt/utils.py +++ b/open_spiel/python/egt/utils.py @@ -483,7 +483,7 @@ def is_symmetric_matrix_game(payoff_tables): if payoffs_are_hpt_format and np.array_equal(payoff_tables[0](), payoff_tables[1]()): return True, [payoff_tables[0]] - elif ~payoffs_are_hpt_format and np.array_equal(payoff_tables[0], + elif not payoffs_are_hpt_format and np.array_equal(payoff_tables[0], payoff_tables[1].T): return True, [payoff_tables[0]] return False, payoff_tables diff --git a/open_spiel/python/examples/psro_v2_example.py b/open_spiel/python/examples/psro_v2_example.py index 10d8a140ca..4741f7ce0e 100644 --- a/open_spiel/python/examples/psro_v2_example.py +++ b/open_spiel/python/examples/psro_v2_example.py @@ -19,12 +19,13 @@ script with: - `game_name` in ['kuhn_poker', 'leduc_poker'] - `n_players` in [2, 3, 4, 5] - - `meta_strategy_method` in ['alpharank', 'uniform', 'nash', 'prd'] + - `meta_strategy_method` in ['alpharank', 'uniform', 'nash', 'prd', 'ssd'] - `rectifier` in ['', 'rectified'] The other parameters keeping their default values. """ +import threading import time from absl import app @@ -44,24 +45,100 @@ from open_spiel.python.algorithms.psro_v2 import rl_oracle from open_spiel.python.algorithms.psro_v2 import rl_policy from open_spiel.python.algorithms.psro_v2 import strategy_selectors +import sys +sys.setrecursionlimit(3000) + + +def get_memory_usage_mb(): + try: + import psutil + + rss = psutil.Process().memory_info().rss + return rss / (1024.0 * 1024.0) + except Exception: + try: + import resource + + rss_kb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + return rss_kb / 1024.0 + except Exception: + return -1.0 + + +class MemorySampler: + """Background sampler that tracks peak memory usage.""" + + def __init__(self, interval_s=0.05): + self._interval_s = max(interval_s, 0.0) + self._running = False + self._thread = None + self.max_mb = -1.0 + + def __enter__(self): + self.start() + return self + + def __exit__(self, exc_type, exc, exc_tb): + self.stop() + + def start(self): + if self._running: + return + self._running = True + self._thread = threading.Thread(target=self._run, daemon=True) + self._thread.start() + + def stop(self): + if not self._running: + return + self._running = False + if self._thread is not None: + self._thread.join() + # Capture one final sample after stopping to include post-loop usage. + self._record_sample() + + def _run(self): + while self._running: + self._record_sample() + if self._interval_s == 0.0: + # Avoid busy spinning if configured interval is zero. + time.sleep(0) + else: + time.sleep(self._interval_s) + + def _record_sample(self): + sample = get_memory_usage_mb() + if sample >= 0: + if self.max_mb < 0: + self.max_mb = sample + else: + self.max_mb = max(self.max_mb, sample) + return sample FLAGS = flags.FLAGS # Game-related -flags.DEFINE_string("game_name", "kuhn_poker", "Game name.") -flags.DEFINE_integer("n_players", 2, "The number of players.") +flags.DEFINE_string("game_name", "leduc_poker", "Game name.") +flags.DEFINE_integer("n_players", 3, "The number of players.") +flags.DEFINE_string( + "log_path", "", + "Optional file path for logging exploitabilities. Leave empty to disable file logging.") + +flags.DEFINE_bool("use_sparse", False, "whether to use scipy sparse matrices") + # PSRO related -flags.DEFINE_string("meta_strategy_method", "alpharank", - "Name of meta strategy computation method.") +flags.DEFINE_string( + "meta_strategy_method", "ssd", + "Name of meta strategy computation method") flags.DEFINE_integer("number_policies_selected", 1, "Number of new strategies trained at each PSRO iteration.") -flags.DEFINE_integer("sims_per_entry", 1000, +flags.DEFINE_integer("sims_per_entry", 100, ("Number of simulations to run to estimate each element" "of the game outcome matrix.")) -flags.DEFINE_integer("gpsro_iterations", 100, +flags.DEFINE_integer("gpsro_iterations", 70, "Number of training steps for GPSRO.") flags.DEFINE_bool("symmetric_game", False, "Whether to consider the current " "game as a symmetric game.") @@ -70,7 +147,7 @@ flags.DEFINE_string("rectifier", "", "Which rectifier to use. Choices are '' " "(No filtering), 'rectified' for rectified.") -flags.DEFINE_string("training_strategy_selector", "probabilistic", +flags.DEFINE_string("training_strategy_selector", "top_k_probabilities", "Which strategy selector to use. Choices are " " - 'top_k_probabilities': select top " "`number_policies_selected` strategies. " @@ -111,6 +188,10 @@ flags.DEFINE_integer("seed", 1, "Seed.") flags.DEFINE_bool("local_launch", False, "Launch locally or not.") flags.DEFINE_bool("verbose", True, "Enables verbose printing and profiling.") +flags.DEFINE_float( + "memory_sample_interval", 0.05, + "Seconds between asynchronous memory samples collected during each" + " gpsro iteration. Set to 0 for best-effort continuous sampling.") def init_pg_responder(env): @@ -254,22 +335,33 @@ def gpsro_looper(env, oracle, agents): prd_iterations=50000, prd_gamma=1e-10, sample_from_marginals=sample_from_marginals, - symmetric_game=FLAGS.symmetric_game) + symmetric_game=FLAGS.symmetric_game, + use_sparse=FLAGS.use_sparse) start_time = time.time() + max_memory_mb = -1.0 for gpsro_iteration in range(FLAGS.gpsro_iterations): - if FLAGS.verbose: - print("Iteration : {}".format(gpsro_iteration)) - print("Time so far: {}".format(time.time() - start_time)) - g_psro_solver.iteration() + iter_start = time.time() + sampler = MemorySampler(interval_s=FLAGS.memory_sample_interval) + sampler.start() + try: + g_psro_solver.iteration() + finally: + sampler.stop() + iter_end = time.time() + iter_time = iter_end - iter_start + memory_mb = get_memory_usage_mb() + iteration_peak_mb = sampler.max_mb if sampler.max_mb >= 0 else memory_mb + if iteration_peak_mb >= 0: + if max_memory_mb < 0: + max_memory_mb = iteration_peak_mb + else: + max_memory_mb = max(max_memory_mb, iteration_peak_mb) + meta_game = g_psro_solver.get_meta_game() meta_probabilities = g_psro_solver.get_meta_strategies() policies = g_psro_solver.get_policies() - if FLAGS.verbose: - print("Meta game : {}".format(meta_game)) - print("Probabilities : {}".format(meta_probabilities)) - # The following lines only work for sequential games for the moment. if env.game.get_type().dynamics == pyspiel.GameType.Dynamics.SEQUENTIAL: aggregator = policy_aggregator.PolicyAggregator(env.game) @@ -281,7 +373,34 @@ def gpsro_looper(env, oracle, agents): _ = print_policy_analysis(policies, env.game, FLAGS.verbose) if FLAGS.verbose: + print("Iteration : {}".format(gpsro_iteration)) + print("Iteration time (s): {:.6f}".format(iter_time)) + if memory_mb >= 0: + print("mb used: {:.2f}".format(memory_mb)) + else: + print("mb used: n/a") + if iteration_peak_mb >= 0: + print("peak iteration mb: {:.2f}".format(iteration_peak_mb)) + else: + print("peak iteration mb: n/a") + if max_memory_mb >= 0: + print("max mb so far: {:.2f}".format(max_memory_mb)) + else: + print("max mb so far: n/a") print("Exploitabilities : {}".format(exploitabilities)) + if FLAGS.log_path: + try: + with open(FLAGS.log_path, "a") as f: + f.write( + str(exploitabilities) + + "," + str(iter_time) + + "," + str(memory_mb) + + "," + str(iteration_peak_mb) + + "," + str(max_memory_mb) + + "\n") + except Exception as e: + if FLAGS.verbose: + print("Failed to write exploitabilities to {}: {}".format(FLAGS.log_path, e)) print("Exploitabilities per player : {}".format(expl_per_player)) diff --git a/open_spiel/python/games/pokerkit_wrapper.py b/open_spiel/python/games/pokerkit_wrapper.py index 432aea41ec..f36d047ec7 100644 --- a/open_spiel/python/games/pokerkit_wrapper.py +++ b/open_spiel/python/games/pokerkit_wrapper.py @@ -43,6 +43,11 @@ import pyspiel +if not hasattr(pyspiel, "pokerkit_wrapper"): + raise ImportError( + "no pokerkit ") + + # For documentation on using pokerkit see # https://pokerkit.readthedocs.io/en/0.4/reference.html, and most notably # https://pokerkit.readthedocs.io/en/0.4/reference.html#module-pokerkit.state