Skip to content

Commit b56c4ba

Browse files
committed
add EvalTournament and use case
1 parent 02fad97 commit b56c4ba

File tree

4 files changed

+13542
-1
lines changed

4 files changed

+13542
-1
lines changed

pydantic_evals/pydantic_evals/tournament.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
import math
44
import random
55
import textwrap
6+
from collections.abc import Awaitable, Callable
67
from enum import Enum
8+
from typing import Any
79

810
import choix
911
import numpy as np
@@ -318,3 +320,65 @@ async def adaptive_uncertainty_strategy(
318320
player.score = float(scores[i])
319321

320322
return players
323+
324+
325+
TournamentStrategy = Callable[
326+
[list[EvalPlayer], EvalGame, Agent, ModelSettings],
327+
Awaitable[list[EvalPlayer]],
328+
]
329+
330+
331+
class EvalTournament(BaseModel):
332+
"""Evaluation tournament running pairwise games."""
333+
game: EvalGame = Field(..., description='game to be played in the tournament')
334+
players: list[EvalPlayer] = Field(..., description='players participating in the tournament')
335+
336+
def get_player_by_idx(self, idx: int) -> EvalPlayer:
337+
"""Return player with unique identifier idx.
338+
339+
Args:
340+
idx: Unique identifier of the player.
341+
342+
Returns:
343+
Player with the specified unique identifier.
344+
"""
345+
for player in self.players:
346+
if player.idx == idx:
347+
return player
348+
raise ValueError(f'Player with unique identifier {idx} not found.')
349+
350+
async def run(
351+
self,
352+
agent: Agent,
353+
model_settings: ModelSettings,
354+
strategy: TournamentStrategy | None = None,
355+
**strategy_kwargs: Any,
356+
) -> list[EvalPlayer]:
357+
"""Runs the evaluation tournament using the specified strategy.
358+
359+
The strategy function handles game sampling, game execution and scoring
360+
allowing complete flexibility in the tournament algorithms.
361+
362+
Args:
363+
agent: Agent for the evaluation game.
364+
model_settings: Model settings for the evaluation game.
365+
strategy: Function with the tournament algorithm.
366+
**strategy_kwargs: Additional arguments passed to the strategy function.
367+
368+
Returns:
369+
List of players with scores.
370+
"""
371+
# Use default strategy if none provided
372+
if strategy is None:
373+
strategy = adaptive_uncertainty_strategy
374+
375+
# Run the tournament strategy (returns players with scores)
376+
self.players = await strategy(
377+
self.players,
378+
self.game,
379+
agent,
380+
model_settings,
381+
**strategy_kwargs,
382+
)
383+
384+
return self.players

0 commit comments

Comments
 (0)