|
1 | 1 | from __future__ import annotations as _annotations |
2 | 2 |
|
| 3 | +import math |
| 4 | +import random |
3 | 5 | import textwrap |
4 | 6 | from enum import Enum |
5 | 7 |
|
| 8 | +import choix |
| 9 | +import numpy as np |
6 | 10 | from pydantic import BaseModel, Field |
7 | 11 |
|
8 | 12 | from pydantic_ai import Agent |
@@ -98,3 +102,219 @@ async def run(self, players: tuple[EvalPlayer, EvalPlayer], agent: Agent[None, G |
98 | 102 | return (players[0].idx, players[1].idx) |
99 | 103 | else: |
100 | 104 | return (players[1].idx, players[0].idx) |
| 105 | + |
| 106 | + |
| 107 | +async def random_sampling_strategy( |
| 108 | + players: list[EvalPlayer], |
| 109 | + game: EvalGame, |
| 110 | + agent: Agent, |
| 111 | + model_settings: ModelSettings, |
| 112 | + fraction_of_games: float | None = None, |
| 113 | +) -> list[EvalPlayer]: |
| 114 | + """Random sampling tournament strategy. |
| 115 | +
|
| 116 | + In a tournament with n players, there are n*(n-1) possible pairwise games. We consider |
| 117 | + (i, j) and (j, i) as different games in order to ensure that the evaluation agent does |
| 118 | + not introduce any ordering bias. The strategy plays all possible games in random order. |
| 119 | + The strategy is simple and not efficient. But when all games are played, it returns the |
| 120 | + best possible scores. |
| 121 | +
|
| 122 | + Args: |
| 123 | + players: List of players in the tournament. |
| 124 | + game: Game defining the pairwise comparisons. |
| 125 | + agent: Agent for the game. |
| 126 | + model_settings: Model settings for the game. |
| 127 | + fraction_of_games: Fraction of all possible games to be played. Between 0 and 1. If None, all games are played. |
| 128 | +
|
| 129 | + Returns: |
| 130 | + List of players with Bradley-Terry scores. |
| 131 | + """ |
| 132 | + scoreboard: list[tuple[int, int]] = [] |
| 133 | + |
| 134 | + # Generate all possible games |
| 135 | + n = len(players) |
| 136 | + matches = [(i, j) for i in range(n) for j in range(n) if i != j] |
| 137 | + random.shuffle(matches) |
| 138 | + if fraction_of_games is not None and 0 < fraction_of_games <= 1: |
| 139 | + number_of_games = int(len(matches) * fraction_of_games) |
| 140 | + matches = matches[:number_of_games] |
| 141 | + |
| 142 | + # Play all games |
| 143 | + for i, match in enumerate(matches): |
| 144 | + player_1, player_2 = players[match[0]], players[match[1]] |
| 145 | + |
| 146 | + result = await game.run( |
| 147 | + players=(player_1, player_2), |
| 148 | + agent=agent, |
| 149 | + model_settings=model_settings, |
| 150 | + ) |
| 151 | + scoreboard.append(result) |
| 152 | + |
| 153 | + # Calculate Bradley-Terry scores and update players |
| 154 | + scores = choix.ilsr_pairwise(len(players), scoreboard, alpha=0.01) |
| 155 | + for i, player in enumerate(players): |
| 156 | + player.score = float(scores[i]) |
| 157 | + |
| 158 | + return players |
| 159 | + |
| 160 | + |
| 161 | +async def round_robin_strategy( |
| 162 | + players: list[EvalPlayer], |
| 163 | + game: EvalGame, |
| 164 | + agent: Agent, |
| 165 | + model_settings: ModelSettings, |
| 166 | + number_of_rounds: int = 2, |
| 167 | +) -> list[EvalPlayer]: |
| 168 | + """Round-robin tournament strategy. |
| 169 | +
|
| 170 | + Each player plays against a randomly selected opponent for a given number of rounds. |
| 171 | + The scores are calculated from the game outcomes using the Bradley-Terry algorithm. |
| 172 | + The strategy ensures that each player plays at least number_of_rounds games. |
| 173 | + The strategy is simple but not efficient. |
| 174 | +
|
| 175 | + Args: |
| 176 | + players: List of players in the tournament. |
| 177 | + game: Game defining the pairwise comparisons. |
| 178 | + agent: Agent for the game. |
| 179 | + model_settings: Model settings for the game. |
| 180 | + number_of_rounds: Number of rounds. |
| 181 | +
|
| 182 | + Returns: |
| 183 | + List of players with Bradley-Terry scores. |
| 184 | + """ |
| 185 | + scoreboard: list[tuple[int, int]] = [] |
| 186 | + |
| 187 | + for n in range(number_of_rounds): |
| 188 | + for player in players: |
| 189 | + # Pick a random opponent (excluding self) |
| 190 | + idx = random.randrange(len(players)) |
| 191 | + while idx == player.idx: |
| 192 | + idx = random.randrange(len(players)) |
| 193 | + player_2 = players[idx] |
| 194 | + |
| 195 | + # Play the game |
| 196 | + result = await game.run( |
| 197 | + players=(player, player_2), |
| 198 | + agent=agent, |
| 199 | + model_settings=model_settings, |
| 200 | + ) |
| 201 | + scoreboard.append(result) |
| 202 | + |
| 203 | + # Calculate Bradley-Terry scores and update players |
| 204 | + scores = choix.ilsr_pairwise(len(players), scoreboard, alpha=0.01) |
| 205 | + for i, player in enumerate(players): |
| 206 | + player.score = float(scores[i]) |
| 207 | + |
| 208 | + return players |
| 209 | + |
| 210 | + |
| 211 | +async def adaptive_uncertainty_strategy( |
| 212 | + players: list[EvalPlayer], |
| 213 | + game: EvalGame, |
| 214 | + agent: Agent, |
| 215 | + model_settings: ModelSettings, |
| 216 | + max_standard_deviation: float = 2.0, |
| 217 | + alpha: float = 0.1, |
| 218 | +) -> list[EvalPlayer]: |
| 219 | + """Adaptive uncertainty tournament strategy. |
| 220 | +
|
| 221 | + The strategy consists of two phases: |
| 222 | + (1) Bootstrap phase: The Bradley-Terry model requires the comparison graph to be strongly connected i.e. |
| 223 | + there must be a path between any two players. We therefore start by playing n/2*log(n) random games where |
| 224 | + n is the number of players. With fewer games, any scores are likely to be unreliable. |
| 225 | + (2) Optimization phase: In this phase, we iteratively calculate the Bradley-Terry scores and their |
| 226 | + covariance matrix, and play the game for which the player scores are the most uncertain. |
| 227 | +
|
| 228 | + Let s_i and s_j the Bradley-Terry scores of players i and j respectively. The uncertainty in their |
| 229 | + relative strength is then given by |
| 230 | +
|
| 231 | + Var(s_i - s_j) = Var(s_i) + Var(s_j) - 2*Cov(s_i, s_j) |
| 232 | +
|
| 233 | + We stop when the standard deviation sqrt(Var(s_i - s_j)) of the most uncertain pair drops below |
| 234 | + the threshold max_standard_deviation, or when all possible pairs have been played. |
| 235 | +
|
| 236 | + Comment on max_standard_deviation parameter: |
| 237 | + Typically, a standard deviation below 1.0 is a good stopping condition. However, the uncertainty |
| 238 | + depends greatly on the evaluation problem. For a problem such as "Which of the following ice cream |
| 239 | + flavours is the most creative one? Vanilla or Chocolate or Strawberry?", the uncertainty will remain |
| 240 | + high even after many games. |
| 241 | +
|
| 242 | + Comment on alpha parameter: |
| 243 | + The alpha parameter is the prior strength for the Bradley-Terry model. Higher alpha (e.g. 0.8) is a |
| 244 | + strong prior towards equal player strengths. The games have a smaller influence on the scores, and |
| 245 | + the scores remain close to the mean of 0. Lower alpha (e.g. 0.1) on the other hand lets the games |
| 246 | + influence the scores more strongly. However, for a sparse comparison graph, the scores can become |
| 247 | + less stable. Typical values are between 0.1 and 0.3. |
| 248 | +
|
| 249 | + Args: |
| 250 | + players: List of players in the tournament. |
| 251 | + game: Game defining the pairwise comparisons. |
| 252 | + agent: Agent for the game. |
| 253 | + model_settings: Model settings for the game. |
| 254 | + max_standard_deviation: Maximum standard deviation for the most uncertain pair. See also above. |
| 255 | + alpha: Prior strength for the Bradley-Terry model. Between 0 and 1. See also above. |
| 256 | +
|
| 257 | + Returns: |
| 258 | + List of players with Bradley-Terry scores. |
| 259 | + """ |
| 260 | + scoreboard: list[tuple[int, int]] = [] |
| 261 | + n = len(players) |
| 262 | + |
| 263 | + # (1) Bootstrap phase |
| 264 | + number_of_bootstrap_games = max(2 * n, int(n / 2 * np.log(n))) |
| 265 | + matches = [(i, j) for i in range(n) for j in range(n) if i != j] |
| 266 | + random.shuffle(matches) |
| 267 | + matches = matches[:number_of_bootstrap_games] |
| 268 | + for idx, match in enumerate(matches): |
| 269 | + player_1, player_2 = players[match[0]], players[match[1]] |
| 270 | + |
| 271 | + result = await game.run( |
| 272 | + players=(player_1, player_2), |
| 273 | + agent=agent, |
| 274 | + model_settings=model_settings, |
| 275 | + ) |
| 276 | + scoreboard.append(result) |
| 277 | + |
| 278 | + # (2) Optimization phase |
| 279 | + max_number_of_games = int(n * (n - 1) / 2.0) |
| 280 | + for idx in range(max_number_of_games): |
| 281 | + # Calculate the Bradley-Terry scores and covariance matrix |
| 282 | + scores, cov_matrix = choix.ep_pairwise(n_items=n, data=scoreboard, alpha=alpha, model='logit') |
| 283 | + |
| 284 | + # Find most uncertain pair which has not yet been played. |
| 285 | + max_uncertainty = -1.0 |
| 286 | + next_pair: tuple[int, int] | None = None |
| 287 | + for i in range(n): |
| 288 | + for j in range(i + 1, n): |
| 289 | + # Check if the pair has already been played. |
| 290 | + # Here we assume that games are symmetric which is not quite correct but good enough. |
| 291 | + if (players[i].idx, players[j].idx) in scoreboard or (players[j].idx, players[i].idx) in scoreboard: |
| 292 | + continue |
| 293 | + |
| 294 | + # Uncertainty of the pair |
| 295 | + uncertainty = cov_matrix[i, i] + cov_matrix[j, j] - 2 * cov_matrix[i, j] |
| 296 | + if uncertainty > max_uncertainty: |
| 297 | + max_uncertainty = uncertainty |
| 298 | + next_pair = (i, j) |
| 299 | + |
| 300 | + # Terminate optimization phase? |
| 301 | + if next_pair is None: |
| 302 | + break |
| 303 | + if math.sqrt(max_uncertainty) < max_standard_deviation: |
| 304 | + break |
| 305 | + |
| 306 | + # Play the most uncertain pair |
| 307 | + player_1, player_2 = players[next_pair[0]], players[next_pair[1]] |
| 308 | + result = await game.run( |
| 309 | + players=(player_1, player_2), |
| 310 | + agent=agent, |
| 311 | + model_settings=model_settings, |
| 312 | + ) |
| 313 | + scoreboard.append(result) |
| 314 | + |
| 315 | + # Final calculation of Bradley-Terry scores and update players |
| 316 | + scores, _ = choix.ep_pairwise(n_items=n, data=scoreboard, alpha=alpha, model='logit') |
| 317 | + for i, player in enumerate(players): |
| 318 | + player.score = float(scores[i]) |
| 319 | + |
| 320 | + return players |
0 commit comments