Skip to content

Commit 7a14812

Browse files
luccabbclaude
andcommitted
[2/9] Fix transposition table with proper bounds
Implements correct transposition table behavior with bound types: **Transposition Table Changes:** - Add `Bound` enum: EXACT, LOWER_BOUND, UPPER_BOUND - Use Zobrist hash as cache key (fast integer vs slow FEN string) - Store bound type and depth with each cache entry - Only use cached scores when depth is sufficient - Properly handle bound types in lookups: - EXACT: use score directly - LOWER_BOUND: use if score >= beta (fail high) - UPPER_BOUND: use if score <= alpha (fail low) **Null Move Pruning Fix:** - Added missing `null_move` parameter check (was always trying null move) **Parallel Engine Updates:** - Update lazy_smp, l1p, l2p to use new zobrist hash cache key - Add context managers for Pool/Manager (proper resource cleanup) - Fix score negation in l1p (opponent perspective -> our perspective) Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent d7df55a commit 7a14812

File tree

4 files changed

+179
-147
lines changed

4 files changed

+179
-147
lines changed

moonfish/engines/alpha_beta.py

Lines changed: 64 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,30 @@
11
from copy import copy
2+
from enum import IntEnum
23
from multiprocessing.managers import DictProxy
3-
from typing import Dict, Optional, Tuple
4+
from typing import Dict, Optional, Tuple, Union
45

6+
import chess.polyglot
57
import chess.syzygy
68
from chess import Board, Move
79
from moonfish.config import Config
810
from moonfish.engines.random import choice
911
from moonfish.move_ordering import organize_moves, organize_moves_quiescence
1012
from moonfish.psqt import board_evaluation, count_pieces
1113

12-
CACHE_KEY = Dict[
13-
Tuple[str, int, bool, float, float], Tuple[float | int, Optional[Move]]
14-
]
14+
15+
class Bound(IntEnum):
16+
"""Transposition table bound types."""
17+
18+
EXACT = 0 # Score is exact (PV node, score was within alpha-beta window)
19+
LOWER_BOUND = 1 # Score is at least this value (failed high / beta cutoff)
20+
UPPER_BOUND = 2 # Score is at most this value (failed low)
21+
22+
23+
# Depth value for terminal positions (checkmate/stalemate) - always usable
24+
DEPTH_MAX = 10000
25+
26+
# Cache: zobrist_hash -> (score, best_move, bound_type, depth)
27+
CACHE_TYPE = Dict[int, Tuple[Union[float, int], Optional[Move], Bound, int]]
1528

1629

1730
class AlphaBeta:
@@ -123,7 +136,7 @@ def negamax(
123136
board: Board,
124137
depth: int,
125138
null_move: bool,
126-
cache: DictProxy | CACHE_KEY,
139+
cache: DictProxy | CACHE_TYPE,
127140
alpha: float = float("-inf"),
128141
beta: float = float("inf"),
129142
) -> Tuple[float | int, Optional[Move]]:
@@ -156,17 +169,36 @@ def negamax(
156169
Returns:
157170
- best_score, best_move: returns best move that it found and its value.
158171
"""
159-
cache_key = (board.fen(), depth, null_move, alpha, beta)
160-
# check if board was already evaluated
172+
original_alpha = alpha
173+
cache_key = chess.polyglot.zobrist_hash(board)
174+
175+
# Check transposition table
161176
if cache_key in cache:
162-
return cache[cache_key]
177+
cached_score, cached_move, cached_bound, cached_depth = cache[cache_key]
178+
179+
# Only use score if cached search was at least as deep as we need
180+
# Use cached result if:
181+
# - EXACT: score is exact
182+
# - LOWER_BOUND and score >= beta: true score is at least cached, causes cutoff
183+
# - UPPER_BOUND and score <= alpha: true score is at most cached, no improvement
184+
if cached_depth >= depth and (
185+
cached_bound == Bound.EXACT
186+
or (cached_bound == Bound.LOWER_BOUND and cached_score >= beta)
187+
or (cached_bound == Bound.UPPER_BOUND and cached_score <= alpha)
188+
):
189+
return cached_score, cached_move
163190

164191
if board.is_checkmate():
165-
cache[cache_key] = (-self.config.checkmate_score, None)
192+
cache[cache_key] = (
193+
-self.config.checkmate_score,
194+
None,
195+
Bound.EXACT,
196+
DEPTH_MAX,
197+
)
166198
return (-self.config.checkmate_score, None)
167199

168200
if board.is_stalemate():
169-
cache[cache_key] = (0, None)
201+
cache[cache_key] = (0, None, Bound.EXACT, DEPTH_MAX)
170202
return (0, None)
171203

172204
# recursion base case
@@ -178,12 +210,13 @@ def negamax(
178210
alpha=alpha,
179211
beta=beta,
180212
)
181-
cache[cache_key] = (board_score, None)
213+
cache[cache_key] = (board_score, None, Bound.EXACT, depth)
182214
return board_score, None
183215

184-
# null move prunning
216+
# null move pruning
185217
if (
186218
self.config.null_move
219+
and null_move
187220
and depth >= (self.config.null_move_r + 1)
188221
and not board.is_check()
189222
):
@@ -200,12 +233,11 @@ def negamax(
200233
)[0]
201234
board.pop()
202235
if board_score >= beta:
203-
cache[cache_key] = (beta, None)
236+
# Null move confirmed beta cutoff - this is a lower bound
237+
cache[cache_key] = (beta, None, Bound.LOWER_BOUND, depth)
204238
return beta, None
205239

206240
best_move = None
207-
208-
# initializing best_score
209241
best_score = float("-inf")
210242
moves = organize_moves(board)
211243

@@ -229,36 +261,38 @@ def negamax(
229261
# take move back
230262
board.pop()
231263

232-
# beta-cutoff
233-
if board_score >= beta:
234-
cache[cache_key] = (board_score, move)
235-
return board_score, move
236-
237264
# update best move
238265
if board_score > best_score:
239266
best_score = board_score
240267
best_move = move
241268

242-
# setting alpha variable to do pruning
243-
alpha = max(alpha, board_score)
269+
# beta-cutoff: opponent won't allow this position
270+
if best_score >= beta:
271+
# LOWER_BOUND: true score is at least best_score
272+
cache[cache_key] = (best_score, best_move, Bound.LOWER_BOUND, depth)
273+
return best_score, best_move
244274

245-
# alpha beta pruning when we already found a solution that is at least as
246-
# good as the current one those branches won't be able to influence the
247-
# final decision so we don't need to waste time analyzing them
248-
if alpha >= beta:
249-
break
275+
# update alpha
276+
alpha = max(alpha, best_score)
250277

251278
# if no best move, make a random one
252279
if not best_move:
253280
best_move = self.random_move(board)
254281

255-
# save result before returning
256-
cache[cache_key] = (best_score, best_move)
282+
# Determine bound type based on whether we improved alpha
283+
if best_score <= original_alpha:
284+
# Failed low: we didn't find anything better than what we already had
285+
bound = Bound.UPPER_BOUND
286+
else:
287+
# Score is exact: we found a score within the window
288+
bound = Bound.EXACT
289+
290+
cache[cache_key] = (best_score, best_move, bound, depth)
257291
return best_score, best_move
258292

259293
def search_move(self, board: Board) -> Move:
260294
# create shared cache
261-
cache: CACHE_KEY = {}
295+
cache: CACHE_TYPE = {}
262296

263297
best_move = self.negamax(
264298
board, copy(self.config.negamax_depth), self.config.null_move, cache

moonfish/engines/l1p_alpha_beta.py

Lines changed: 34 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -14,35 +14,38 @@ class Layer1ParallelAlphaBeta(AlphaBeta):
1414
def search_move(self, board: Board) -> Move:
1515
# start multiprocessing
1616
nprocs = cpu_count()
17-
pool = Pool(processes=nprocs)
18-
manager = Manager()
19-
shared_cache = manager.dict()
20-
21-
# creating list of moves at layer 1
22-
moves = list(board.legal_moves)
23-
arguments = []
24-
for move in moves:
25-
board.push(move)
26-
arguments.append(
27-
(
28-
copy(board),
29-
copy(self.config.negamax_depth) - 1,
30-
self.config.null_move,
31-
shared_cache,
17+
18+
with Pool(processes=nprocs) as pool, Manager() as manager:
19+
shared_cache = manager.dict()
20+
21+
# creating list of moves at layer 1
22+
moves = list(board.legal_moves)
23+
arguments = []
24+
for move in moves:
25+
board.push(move)
26+
arguments.append(
27+
(
28+
copy(board),
29+
copy(self.config.negamax_depth) - 1,
30+
self.config.null_move,
31+
shared_cache,
32+
)
3233
)
33-
)
34-
board.pop()
35-
36-
# executing all the moves at layer 1 in parallel
37-
# starmap blocks until all process are done
38-
processes = pool.starmap(self.negamax, arguments)
39-
results = []
40-
41-
# inserting move information in the results
42-
for i in range(len(processes)):
43-
results.append((*processes[i], moves[i]))
44-
45-
# sorting results and getting best move
46-
results.sort(key=lambda a: a[0])
47-
best_move = results[0][2]
48-
return best_move
34+
board.pop()
35+
36+
# executing all the moves at layer 1 in parallel
37+
# starmap blocks until all processes are done
38+
processes = pool.starmap(self.negamax, arguments)
39+
results = []
40+
41+
# inserting move information in the results
42+
# negamax returns (score, best_move) - we negate score since
43+
# it's from opponent's perspective
44+
for i in range(len(processes)):
45+
score = -processes[i][0] # Negate: opponent's -> our perspective
46+
results.append((score, processes[i][1], moves[i]))
47+
48+
# sorting results by score (descending) and getting best move
49+
results.sort(key=lambda a: a[0], reverse=True)
50+
best_move = results[0][2]
51+
return best_move

moonfish/engines/l2p_alpha_beta.py

Lines changed: 59 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -79,65 +79,65 @@ def search_move(self, board: Board) -> Move:
7979
START_LAYER = 2
8080
# start multiprocessing
8181
nprocs = cpu_count()
82-
pool = Pool(processes=nprocs)
83-
manager = Manager()
84-
shared_cache = manager.dict()
8582

86-
# pointer that help us in finding the best next move
87-
board_to_move_that_generates_it = manager.dict()
83+
with Pool(processes=nprocs) as pool, Manager() as manager:
84+
shared_cache = manager.dict()
85+
86+
# pointer that help us in finding the best next move
87+
board_to_move_that_generates_it = manager.dict()
88+
89+
# starting board list
90+
board_list = [(board, board, 0)]
91+
92+
# generating all possible boards for up to 2 moves ahead
93+
for _ in range(START_LAYER):
94+
arguments = [
95+
(board, board_to_move_that_generates_it, layer)
96+
for board, _, layer in board_list
97+
]
98+
processes = pool.starmap(self.generate_board_and_moves, arguments)
99+
board_list = [board for board in sum(processes, [])]
100+
101+
negamax_arguments = [
102+
(
103+
board,
104+
copy(self.config.negamax_depth) - START_LAYER,
105+
self.config.null_move,
106+
shared_cache,
107+
)
108+
for board, _, _ in board_list
109+
]
88110

89-
# starting board list
90-
board_list = [(board, board, 0)]
111+
parallel_layer_result = pool.starmap(self.negamax, negamax_arguments)
91112

92-
# generating all possible boards for up to 2 moves ahead
93-
for _ in range(START_LAYER):
94-
arguments = [
95-
(board, board_to_move_that_generates_it, layer)
96-
for board, _, layer in board_list
97-
]
98-
processes = pool.starmap(self.generate_board_and_moves, arguments)
99-
board_list = [board for board in sum(processes, [])]
100-
101-
negamax_arguments = [
102-
(
103-
board,
104-
copy(self.config.negamax_depth) - START_LAYER,
105-
self.config.null_move,
106-
shared_cache,
107-
)
108-
for board, _, _ in board_list
109-
]
110-
111-
parallel_layer_result = pool.starmap(self.negamax, negamax_arguments)
112-
113-
# grouping output based on the board that generates it
114-
groups = defaultdict(list)
115-
116-
# adding information about the board and layer
117-
# that generates the results and separating them
118-
# into groups based on the root board
119-
for i in range(len(parallel_layer_result)):
120-
groups[board_list[i][1].fen()].append(
121-
(*parallel_layer_result[i], board_list[i][0], board_list[i][2])
122-
)
123-
124-
best_boards = []
125-
126-
for group in groups.values():
127-
# layer and checkmate corrections
128-
# they are needed to adjust for
129-
# boards from different layers
130-
group = list(map(LAYER_SIGNAL_CORRECTION, group))
131-
group = list(map(self.checkmate_correction, group))
132-
# get best move from group
133-
group.sort(key=lambda a: a[0])
134-
best_boards.append(group[0])
135-
136-
# get best board
137-
best_boards.sort(key=lambda a: a[0], reverse=True)
138-
best_board = best_boards[0][2].fen()
139-
140-
# get move that results in best board
141-
best_move = board_to_move_that_generates_it[best_board]
142-
143-
return best_move
113+
# grouping output based on the board that generates it
114+
groups = defaultdict(list)
115+
116+
# adding information about the board and layer
117+
# that generates the results and separating them
118+
# into groups based on the root board
119+
for i in range(len(parallel_layer_result)):
120+
groups[board_list[i][1].fen()].append(
121+
(*parallel_layer_result[i], board_list[i][0], board_list[i][2])
122+
)
123+
124+
best_boards = []
125+
126+
for group in groups.values():
127+
# layer and checkmate corrections
128+
# they are needed to adjust for
129+
# boards from different layers
130+
group = list(map(LAYER_SIGNAL_CORRECTION, group))
131+
group = list(map(self.checkmate_correction, group))
132+
# get best move from group
133+
group.sort(key=lambda a: a[0])
134+
best_boards.append(group[0])
135+
136+
# get best board
137+
best_boards.sort(key=lambda a: a[0], reverse=True)
138+
best_board = best_boards[0][2].fen()
139+
140+
# get move that results in best board
141+
best_move = board_to_move_that_generates_it[best_board]
142+
143+
return best_move

0 commit comments

Comments
 (0)