[2/9] Fix transposition table with proper bounds

luccabb · claude · luccabb · commit 7a14812a6a1b · 2026-01-20T22:43:48.000-08:00
Implements correct transposition table behavior with bound types:

**Transposition Table Changes:**
- Add `Bound` enum: EXACT, LOWER_BOUND, UPPER_BOUND
- Use Zobrist hash as cache key (fast integer vs slow FEN string)
- Store bound type and depth with each cache entry
- Only use cached scores when depth is sufficient
- Properly handle bound types in lookups:
  - EXACT: use score directly
  - LOWER_BOUND: use if score &gt;= beta (fail high)
  - UPPER_BOUND: use if score &lt;= alpha (fail low)

**Null Move Pruning Fix:**
- Added missing `null_move` parameter check (was always trying null move)

**Parallel Engine Updates:**
- Update lazy_smp, l1p, l2p to use new zobrist hash cache key
- Add context managers for Pool/Manager (proper resource cleanup)
- Fix score negation in l1p (opponent perspective -&gt; our perspective)

Co-Authored-By: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/moonfish/engines/alpha_beta.py b/moonfish/engines/alpha_beta.py
@@ -1,17 +1,30 @@
 from copy import copy
+from enum import IntEnum
 from multiprocessing.managers import DictProxy
-from typing import Dict, Optional, Tuple
+from typing import Dict, Optional, Tuple, Union
 
+import chess.polyglot
 import chess.syzygy
 from chess import Board, Move
 from moonfish.config import Config
 from moonfish.engines.random import choice
 from moonfish.move_ordering import organize_moves, organize_moves_quiescence
 from moonfish.psqt import board_evaluation, count_pieces
 
-CACHE_KEY = Dict[
-    Tuple[str, int, bool, float, float], Tuple[float | int, Optional[Move]]
-]
+
+class Bound(IntEnum):
+    """Transposition table bound types."""
+
+    EXACT = 0  # Score is exact (PV node, score was within alpha-beta window)
+    LOWER_BOUND = 1  # Score is at least this value (failed high / beta cutoff)
+    UPPER_BOUND = 2  # Score is at most this value (failed low)
+
+
+# Depth value for terminal positions (checkmate/stalemate) - always usable
+DEPTH_MAX = 10000
+
+# Cache: zobrist_hash -> (score, best_move, bound_type, depth)
+CACHE_TYPE = Dict[int, Tuple[Union[float, int], Optional[Move], Bound, int]]
 
 
 class AlphaBeta:
@@ -123,7 +136,7 @@ def negamax(
         board: Board,
         depth: int,
         null_move: bool,
-        cache: DictProxy | CACHE_KEY,
+        cache: DictProxy | CACHE_TYPE,
         alpha: float = float("-inf"),
         beta: float = float("inf"),
     ) -> Tuple[float | int, Optional[Move]]:
@@ -156,17 +169,36 @@ def negamax(
         Returns:
             - best_score, best_move: returns best move that it found and its value.
         """
-        cache_key = (board.fen(), depth, null_move, alpha, beta)
-        # check if board was already evaluated
+        original_alpha = alpha
+        cache_key = chess.polyglot.zobrist_hash(board)
+
+        # Check transposition table
         if cache_key in cache:
-            return cache[cache_key]
+            cached_score, cached_move, cached_bound, cached_depth = cache[cache_key]
+
+            # Only use score if cached search was at least as deep as we need
+            # Use cached result if:
+            # - EXACT: score is exact
+            # - LOWER_BOUND and score >= beta: true score is at least cached, causes cutoff
+            # - UPPER_BOUND and score <= alpha: true score is at most cached, no improvement
+            if cached_depth >= depth and (
+                cached_bound == Bound.EXACT
+                or (cached_bound == Bound.LOWER_BOUND and cached_score >= beta)
+                or (cached_bound == Bound.UPPER_BOUND and cached_score <= alpha)
+            ):
+                return cached_score, cached_move
 
         if board.is_checkmate():
-            cache[cache_key] = (-self.config.checkmate_score, None)
+            cache[cache_key] = (
+                -self.config.checkmate_score,
+                None,
+                Bound.EXACT,
+                DEPTH_MAX,
+            )
             return (-self.config.checkmate_score, None)
 
         if board.is_stalemate():
-            cache[cache_key] = (0, None)
+            cache[cache_key] = (0, None, Bound.EXACT, DEPTH_MAX)
             return (0, None)
 
         # recursion base case
@@ -178,12 +210,13 @@ def negamax(
                 alpha=alpha,
                 beta=beta,
             )
-            cache[cache_key] = (board_score, None)
+            cache[cache_key] = (board_score, None, Bound.EXACT, depth)
             return board_score, None
 
-        # null move prunning
+        # null move pruning
         if (
             self.config.null_move
+            and null_move
             and depth >= (self.config.null_move_r + 1)
             and not board.is_check()
         ):
@@ -200,12 +233,11 @@ def negamax(
                 )[0]
                 board.pop()
                 if board_score >= beta:
-                    cache[cache_key] = (beta, None)
+                    # Null move confirmed beta cutoff - this is a lower bound
+                    cache[cache_key] = (beta, None, Bound.LOWER_BOUND, depth)
                     return beta, None
 
         best_move = None
-
-        # initializing best_score
         best_score = float("-inf")
         moves = organize_moves(board)
 
@@ -229,36 +261,38 @@ def negamax(
             # take move back
             board.pop()
 
-            # beta-cutoff
-            if board_score >= beta:
-                cache[cache_key] = (board_score, move)
-                return board_score, move
-
             # update best move
             if board_score > best_score:
                 best_score = board_score
                 best_move = move
 
-            # setting alpha variable to do pruning
-            alpha = max(alpha, board_score)
+            # beta-cutoff: opponent won't allow this position
+            if best_score >= beta:
+                # LOWER_BOUND: true score is at least best_score
+                cache[cache_key] = (best_score, best_move, Bound.LOWER_BOUND, depth)
+                return best_score, best_move
 
-            # alpha beta pruning when we already found a solution that is at least as
-            # good as the current one those branches won't be able to influence the
-            # final decision so we don't need to waste time analyzing them
-            if alpha >= beta:
-                break
+            # update alpha
+            alpha = max(alpha, best_score)
 
         # if no best move, make a random one
         if not best_move:
             best_move = self.random_move(board)
 
-        # save result before returning
-        cache[cache_key] = (best_score, best_move)
+        # Determine bound type based on whether we improved alpha
+        if best_score <= original_alpha:
+            # Failed low: we didn't find anything better than what we already had
+            bound = Bound.UPPER_BOUND
+        else:
+            # Score is exact: we found a score within the window
+            bound = Bound.EXACT
+
+        cache[cache_key] = (best_score, best_move, bound, depth)
         return best_score, best_move
 
     def search_move(self, board: Board) -> Move:
         # create shared cache
-        cache: CACHE_KEY = {}
+        cache: CACHE_TYPE = {}
 
         best_move = self.negamax(
             board, copy(self.config.negamax_depth), self.config.null_move, cache
diff --git a/moonfish/engines/l1p_alpha_beta.py b/moonfish/engines/l1p_alpha_beta.py
@@ -14,35 +14,38 @@ class Layer1ParallelAlphaBeta(AlphaBeta):
     def search_move(self, board: Board) -> Move:
         # start multiprocessing
         nprocs = cpu_count()
-        pool = Pool(processes=nprocs)
-        manager = Manager()
-        shared_cache = manager.dict()
-
-        # creating list of moves at layer 1
-        moves = list(board.legal_moves)
-        arguments = []
-        for move in moves:
-            board.push(move)
-            arguments.append(
-                (
-                    copy(board),
-                    copy(self.config.negamax_depth) - 1,
-                    self.config.null_move,
-                    shared_cache,
+
+        with Pool(processes=nprocs) as pool, Manager() as manager:
+            shared_cache = manager.dict()
+
+            # creating list of moves at layer 1
+            moves = list(board.legal_moves)
+            arguments = []
+            for move in moves:
+                board.push(move)
+                arguments.append(
+                    (
+                        copy(board),
+                        copy(self.config.negamax_depth) - 1,
+                        self.config.null_move,
+                        shared_cache,
+                    )
                 )
-            )
-            board.pop()
-
-        # executing all the moves at layer 1 in parallel
-        # starmap blocks until all process are done
-        processes = pool.starmap(self.negamax, arguments)
-        results = []
-
-        # inserting move information in the results
-        for i in range(len(processes)):
-            results.append((*processes[i], moves[i]))
-
-        # sorting results and getting best move
-        results.sort(key=lambda a: a[0])
-        best_move = results[0][2]
-        return best_move
+                board.pop()
+
+            # executing all the moves at layer 1 in parallel
+            # starmap blocks until all processes are done
+            processes = pool.starmap(self.negamax, arguments)
+            results = []
+
+            # inserting move information in the results
+            # negamax returns (score, best_move) - we negate score since
+            # it's from opponent's perspective
+            for i in range(len(processes)):
+                score = -processes[i][0]  # Negate: opponent's -> our perspective
+                results.append((score, processes[i][1], moves[i]))
+
+            # sorting results by score (descending) and getting best move
+            results.sort(key=lambda a: a[0], reverse=True)
+            best_move = results[0][2]
+            return best_move
diff --git a/moonfish/engines/l2p_alpha_beta.py b/moonfish/engines/l2p_alpha_beta.py
@@ -79,65 +79,65 @@ def search_move(self, board: Board) -> Move:
         START_LAYER = 2
         # start multiprocessing
         nprocs = cpu_count()
-        pool = Pool(processes=nprocs)
-        manager = Manager()
-        shared_cache = manager.dict()
 
-        # pointer that help us in finding the best next move
-        board_to_move_that_generates_it = manager.dict()
+        with Pool(processes=nprocs) as pool, Manager() as manager:
+            shared_cache = manager.dict()
+
+            # pointer that help us in finding the best next move
+            board_to_move_that_generates_it = manager.dict()
+
+            # starting board list
+            board_list = [(board, board, 0)]
+
+            # generating all possible boards for up to 2 moves ahead
+            for _ in range(START_LAYER):
+                arguments = [
+                    (board, board_to_move_that_generates_it, layer)
+                    for board, _, layer in board_list
+                ]
+                processes = pool.starmap(self.generate_board_and_moves, arguments)
+                board_list = [board for board in sum(processes, [])]
+
+            negamax_arguments = [
+                (
+                    board,
+                    copy(self.config.negamax_depth) - START_LAYER,
+                    self.config.null_move,
+                    shared_cache,
+                )
+                for board, _, _ in board_list
+            ]
 
-        # starting board list
-        board_list = [(board, board, 0)]
+            parallel_layer_result = pool.starmap(self.negamax, negamax_arguments)
 
-        # generating all possible boards for up to 2 moves ahead
-        for _ in range(START_LAYER):
-            arguments = [
-                (board, board_to_move_that_generates_it, layer)
-                for board, _, layer in board_list
-            ]
-            processes = pool.starmap(self.generate_board_and_moves, arguments)
-            board_list = [board for board in sum(processes, [])]
-
-        negamax_arguments = [
-            (
-                board,
-                copy(self.config.negamax_depth) - START_LAYER,
-                self.config.null_move,
-                shared_cache,
-            )
-            for board, _, _ in board_list
-        ]
-
-        parallel_layer_result = pool.starmap(self.negamax, negamax_arguments)
-
-        # grouping output based on the  board that generates it
-        groups = defaultdict(list)
-
-        # adding information about the board and layer
-        # that generates the results and separating them
-        # into groups based on the root board
-        for i in range(len(parallel_layer_result)):
-            groups[board_list[i][1].fen()].append(
-                (*parallel_layer_result[i], board_list[i][0], board_list[i][2])
-            )
-
-        best_boards = []
-
-        for group in groups.values():
-            # layer and checkmate corrections
-            # they are needed to adjust for
-            # boards from different layers
-            group = list(map(LAYER_SIGNAL_CORRECTION, group))
-            group = list(map(self.checkmate_correction, group))
-            # get best move from group
-            group.sort(key=lambda a: a[0])
-            best_boards.append(group[0])
-
-        # get best board
-        best_boards.sort(key=lambda a: a[0], reverse=True)
-        best_board = best_boards[0][2].fen()
-
-        # get move that results in best board
-        best_move = board_to_move_that_generates_it[best_board]
-
-        return best_move
+            # grouping output based on the board that generates it
+            groups = defaultdict(list)
+
+            # adding information about the board and layer
+            # that generates the results and separating them
+            # into groups based on the root board
+            for i in range(len(parallel_layer_result)):
+                groups[board_list[i][1].fen()].append(
+                    (*parallel_layer_result[i], board_list[i][0], board_list[i][2])
+                )
+
+            best_boards = []
+
+            for group in groups.values():
+                # layer and checkmate corrections
+                # they are needed to adjust for
+                # boards from different layers
+                group = list(map(LAYER_SIGNAL_CORRECTION, group))
+                group = list(map(self.checkmate_correction, group))
+                # get best move from group
+                group.sort(key=lambda a: a[0])
+                best_boards.append(group[0])
+
+            # get best board
+            best_boards.sort(key=lambda a: a[0], reverse=True)
+            best_board = best_boards[0][2].fen()
+
+            # get move that results in best board
+            best_move = board_to_move_that_generates_it[best_board]
+
+            return best_move
diff --git a/moonfish/engines/lazy_smp.py b/moonfish/engines/lazy_smp.py