fix endgame solver timeout handling for also-solve-move

domino14 · claude · domino14 · commit d4d04a83b032 · 2026-03-09T00:42:50.000-04:00
This fixes a bug where the endgame solver would fail when using
SetAlsoSolveMove (used by the analyzer for volunteer mode) if the
main solve timed out before completing all requested plies.

The issue had three parts:

1. When the main solve timed out at (say) ply 7 after completing ply 6,
   it would try to solve the also-solve move using the expired context,
   which would immediately fail.

2. Even if it succeeded, the also-solve move would be solved at ply 7
   while the best move was only solved to ply 6, making results
   incomparable.

3. The best move from the partial solve wasn't saved to variations,
   so the also-solve move would incorrectly become the "best" move.

The fix:

- Track lastCompletedPly in all three parallel algorithms (ABDADA,
  LazySMP, TreeSplit) to know what depth was actually completed

- When solving also-solve move after a timeout, use the same completed
  ply depth as the best move to ensure comparable results

- Create a fresh 60-second timeout context specifically for solving
  the also-solve move, since the original context has expired

- Save the best move to variations before breaking on timeout, so it
  doesn't get lost when also-solve move is added to variations

This ensures both moves are solved to the same depth and results are
properly comparable for spread loss calculation.

Also updated endgame.txt helptext to document the timeout behavior.

Co-Authored-By: Claude Sonnet 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/endgame/negamax/solver.go b/endgame/negamax/solver.go
@@ -239,6 +239,9 @@ type Solver struct {
 
 	alsoSolveMove tinymove.TinyMove
 
+	// Track the last successfully completed ply depth
+	lastCompletedPly int
+
 	// Metrics from last solve
 	lastSolveTime   float64
 	lastTTableStats string
@@ -526,6 +529,8 @@ func (s *Solver) iterativelyDeepenLazySMP(ctx context.Context, plies int) error
 			log.Err(err).Msg("lazySMP-possible-error")
 			return err
 		}
+		// Successfully completed this ply
+		s.lastCompletedPly = p
 		justWon := s.principalVariation.Moves[0]
 
 		if s.preventSlowroll {
@@ -689,6 +694,8 @@ func (s *Solver) iterativelyDeepenABDADA(ctx context.Context, plies int) error {
 			// Check if we're within the aspiration window
 			if val > aspα && val < aspβ {
 				lastIteration = val
+				// Successfully completed this ply
+				s.lastCompletedPly = p
 				break aspirationLoop
 			}
 
@@ -911,6 +918,9 @@ func (s *Solver) iterativelyDeepenTreeSplit(ctx context.Context, plies int) erro
 			s.bestPVValue = finalBest - int16(s.initialSpread)
 		}
 
+		// Successfully completed this ply
+		s.lastCompletedPly = p
+
 		nodes := s.nodes.Load()
 		log.Info().Int16("spread", finalBest-int16(s.initialSpread)).Int("ply", p).Str("pv", s.principalVariation.NLBString()).Uint64("total-nodes", nodes).Msg("best-val")
 	}
@@ -1145,6 +1155,12 @@ searchLoop:
 		err := algorithmFunc(ctx, plies)
 		if err != nil {
 			log.Err(err).Msg("algorithm-error-in-multiple-variations")
+			// Save the best move found so far before breaking
+			if s.principalVariation.numMoves > 0 {
+				s.variations = append(s.variations, s.principalVariation)
+				log.Info().Str("partial-result", s.principalVariation.NLBString()).
+					Msg("saved-partial-result-before-timeout")
+			}
 			break searchLoop
 		}
 
@@ -1181,11 +1197,29 @@ searchLoop:
 			}
 			if len(filtered) > 0 {
 				s.initialMoves[0] = filtered
-				err := algorithmFunc(ctx, plies)
+
+				// Use the same ply depth that was successfully completed for the best move
+				// to ensure comparable results
+				pliesToUse := plies
+				if s.lastCompletedPly > 0 && s.lastCompletedPly < plies {
+					pliesToUse = s.lastCompletedPly
+					log.Info().
+						Int("requested-plies", plies).
+						Int("using-plies", pliesToUse).
+						Msg("using-last-completed-ply-for-also-solve-move")
+				}
+
+				// Create fresh context with 1 minute timeout for solving just this move
+				freshCtx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
+				defer cancel()
+
+				err := algorithmFunc(freshCtx, pliesToUse)
 				if err == nil {
 					s.variations = append(s.variations, s.principalVariation)
 					log.Info().Str("also-solve-move", s.principalVariation.NLBString()).
 						Msg("solved-also-solve-var")
+				} else {
+					log.Warn().Err(err).Msg("failed-to-solve-also-solve-move")
 				}
 			}
 		}
@@ -1655,6 +1689,7 @@ func (s *Solver) Solve(ctx context.Context, plies int) (int16, []*move.Move, err
 	log.Debug().Int("plies", plies).Msg("alphabeta-solve-config")
 	s.requestedPlies = plies
 	s.variations = []PVLine{}
+	s.lastCompletedPly = 0
 
 	tstart := time.Now()
 	s.stmMovegen.SetSortingParameter(movegen.SortByNone)
diff --git a/shell/helptext/endgame.txt b/shell/helptext/endgame.txt
@@ -57,6 +57,10 @@ Options:
     iterative deepening by default, so a good enough solution should be found
     rapidly, and this solution will be improved upon.
 
+    Note: When using -also-solve-var (see below), if the main solve times out,
+    the specified variant will be solved with a fresh 60-second timeout. This
+    means the total solve time could be up to maxtime + 60 seconds.
+
     -disable-id true
 
     This option disables iterative deepening. Iterative Deepening allows the
@@ -127,4 +131,10 @@ Options:
 
     This mode will always solve the variant you specify, in addition to also finding
     the top play. This mode will tell you how far the optimal sequence beginning
-    with the specified move is from the optimal sequence.
+    with the specified move is from the optimal sequence.
+
+    Note: The specified variant will be solved to the same ply depth as the optimal
+    move to ensure comparable results. If the main solve times out before reaching
+    the requested plies, the variant will be solved to the last completed ply depth.
+    The variant solve gets a fresh 60-second timeout, so if the main solve reaches
+    maxtime, total solve time may be up to maxtime + 60 seconds.