thomas-schweich
diff --git a/‎CLAUDE.md‎
Lines changed: 6 additions & 5 deletions b/‎CLAUDE.md‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎engine/python/chess_engine/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎engine/python/chess_engine/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎engine/src/batch.rs‎
Lines changed: 74 additions & 50 deletions b/‎engine/src/batch.rs‎
Lines changed: 74 additions & 50 deletions
@@ -10,7 +10,7 @@ pawn/
 ├── pawn/            # Core Python package
 │   ├── config.py    # CLMConfig (small/base/large), TrainingConfig
 │   ├── model.py     # PAWNCLM transformer (RMSNorm, SwiGLU, RoPE, factored embeddings)
-│   ├── data.py      # On-the-fly random game data pipeline
+│   ├── data.py      # On-the-fly random game data pipeline (prepend_outcome flag)
 │   ├── lichess_data.py  # Lichess PGN data pipeline + legal mask computation
 │   ├── trainer.py   # Pretraining loop
 │   ├── gpu.py       # GPU auto-detection (compile/AMP/SDPA backend)
@@ -55,14 +55,15 @@ The only extras are GPU backends (`rocm` or `cu128`). Everything else (pytest, s
 - Uses rayon for parallel game generation (~43K games/sec, 150M+/hr)
 - PyO3 bindings expose `chess_engine` module to Python
 - Key functions: `generate_random_games()`, `parse_pgn_file()`, `compute_legal_token_masks_sparse()`, `extract_board_states()`, `export_move_vocabulary()`, `compute_accuracy_ceiling()`
+- `export_move_vocabulary()` returns 1,968-entry maps (searchless_chess compatible). Conversion functions `pawn_to_searchless()` and `searchless_to_pawn()` bridge between legacy PAWN token IDs and searchless_chess action indices.
 
 ## Model
 
 ### Architecture
-- Decoder-only transformer, next-token prediction over 4,278 tokens
-- Token vocabulary: 1 PAD + 4,096 grid (64x64 src/dst) + 176 promotions + 5 outcomes
+- Decoder-only transformer, next-token prediction over 1,968 move tokens (1,980 total vocab)
+- Token vocabulary: 1,968 searchless_chess actions (0-1967) + 1 PAD (1968) + 11 outcomes (1969-1979) = 1,980 total
 - Factored embeddings: `src_embed[s] + dst_embed[d] + promo_embed[p]`
-- Sequence format: `[outcome] [ply_1] ... [ply_N] [PAD] ... [PAD]` (256 tokens)
+- Sequence format: `[ply_1] ... [ply_N] [PAD] ... [PAD]` (512 tokens) — outcome prefix is optional via `prepend_outcome` flag
 
 ### Variants
 - `CLMConfig.small()`: d=256, 8 layers, 4 heads, ~9.5M params
@@ -373,7 +374,7 @@ Supports all adapter types + architecture search. GPU affinity assigns `CUDA_VIS
 - **DataLoader workers must use `multiprocessing_context='spawn'`** — the Rust engine uses rayon, and fork after rayon init causes deadlocks.
 - **`SDPA_BACKEND` must be set before `torch.compile()`** — compiled code captures the backend at trace time. `apply_gpu_config()` handles this.
 - **ROCm works**: The only known ROCm issue is a stride mismatch in flash attention backward when combined with `torch.compile` + AMP. The workaround is `--sdpa-math` (use the MATH SDPA backend instead of flash), which `configure_gpu()` applies automatically on AMD GPUs. Everything else — training, eval, adapters, data loading — works identically on ROCm and CUDA. **Do not assume bugs are ROCm-specific.** Every other time something has failed on AMD it turned out to be a bug in our code (wrong torch version installed, stale lockfile, missing dependency, etc.), not a ROCm issue.
-- **Sparse logit projection**: `forward_hidden()` returns `(B,T,d_model)`, then only loss-masked positions project through `lm_head` — avoids full `(B,T,V)` materialization.
+- **Sparse logit projection**: `forward_hidden()` returns `(B,T,d_model)`, then only loss-masked positions project through `lm_head` — avoids full `(B,T,1980)` materialization.
 - **Legal mask via Rust**: `LegalMaskBuilder` replays games in Rust, returns sparse indices (~2 MB) scattered into a pre-allocated GPU buffer (vs ~70 MB dense).
 - **GPU auto-detection**: `pawn.gpu.configure_gpu()` selects compile/AMP/SDPA settings. `apply_gpu_config()` applies them. NVIDIA uses flash attention + compile; AMD uses MATH SDPA + compile. Both paths are tested and production-validated.
 - **Factored embeddings**: each move token decomposes into `src_embed[s] + dst_embed[d] + promo_embed[p]`, reducing embedding parameters by ~32x.
@@ -38,6 +38,8 @@
     generate_engine_games_py as generate_engine_games,
     # Vocabulary
     export_move_vocabulary,
+    pawn_to_searchless,
+    searchless_to_pawn,
     # Interactive game state (for RL)
     PyGameState,
     PyBatchRLEnv,
@@ -73,6 +75,8 @@
     "pgn_to_uci",
     "generate_engine_games",
     "export_move_vocabulary",
+    "pawn_to_searchless",
+    "searchless_to_pawn",
     "PyGameState",
     "PyBatchRLEnv",
     "compute_accuracy_ceiling",
 
@@ -281,58 +281,71 @@ pub struct CLMBatch {
 
 /// Generate a CLM training batch: random games packed into model-ready format.
 ///
-/// `seq_len` is the total sequence length (256). Games are generated with up to
-/// `seq_len - 1` plies, leaving position 0 for the outcome token.
+/// When `prepend_outcome` is false (default), sequences are pure moves:
+///   `[move_1, move_2, ..., move_N, PAD, ...]` and `max_ply = seq_len`.
+///
+/// When `prepend_outcome` is true, position 0 is the outcome token:
+///   `[outcome, move_1, ..., move_N, PAD, ...]` and `max_ply = seq_len - 1`.
 pub fn generate_clm_batch(
     batch_size: usize,
     seq_len: usize,
     seed: u64,
     discard_ply_limit: bool,
     mate_boost: f64,
+    prepend_outcome: bool,
 ) -> CLMBatch {
-    let max_ply = seq_len - 1;
+    let max_ply = if prepend_outcome { seq_len - 1 } else { seq_len };
 
     let game_batch = {
         generate_random_games(batch_size, max_ply, seed, mate_boost, discard_ply_limit)
     };
 
-    let mut input_ids = vec![0i16; batch_size * seq_len];
-    let mut targets = vec![0i16; batch_size * seq_len];
+    let pad = vocab::PAD_TOKEN as i16;
+    let mut input_ids = vec![pad; batch_size * seq_len];
+    let mut targets = vec![pad; batch_size * seq_len];
     let mut loss_mask = vec![false; batch_size * seq_len];
 
     for b in 0..batch_size {
         let gl = game_batch.game_lengths[b] as usize;
-        let term = match game_batch.termination_codes[b] {
-            0 => Termination::Checkmate,
-            1 => Termination::Stalemate,
-            2 => Termination::SeventyFiveMoveRule,
-            3 => Termination::FivefoldRepetition,
-            4 => Termination::InsufficientMaterial,
-            _ => Termination::PlyLimit,
-        };
-        let outcome = vocab::termination_to_outcome(term, game_batch.game_lengths[b] as u16);
-
         let row = b * seq_len;
 
-        // Position 0: outcome token
-        input_ids[row] = outcome as i16;
+        if prepend_outcome {
+            // Outcome-prefixed format: [outcome, m1, ..., mN, PAD, ...]
+            let term = match game_batch.termination_codes[b] {
+                0 => Termination::Checkmate,
+                1 => Termination::Stalemate,
+                2 => Termination::SeventyFiveMoveRule,
+                3 => Termination::FivefoldRepetition,
+                4 => Termination::InsufficientMaterial,
+                _ => Termination::PlyLimit,
+            };
+            let outcome = vocab::termination_to_outcome(term, game_batch.game_lengths[b] as u16);
+            input_ids[row] = outcome as i16;
+
+            for t in 0..gl {
+                input_ids[row + 1 + t] = game_batch.move_ids[b * max_ply + t];
+            }
+
+            // Loss mask: positions 0..=gl are true
+            for t in 0..=gl {
+                loss_mask[row + t] = true;
+            }
+        } else {
+            // Pure moves format: [m1, m2, ..., mN, PAD, ...]
+            for t in 0..gl {
+                input_ids[row + t] = game_batch.move_ids[b * max_ply + t];
+            }
 
-        // Positions 1..=gl: move tokens
-        for t in 0..gl {
-            input_ids[row + 1 + t] = game_batch.move_ids[b * max_ply + t];
+            // Loss mask: positions 0..gl-1 are true (gl positions predict gl targets)
+            for t in 0..gl {
+                loss_mask[row + t] = true;
+            }
         }
-        // Remaining positions are already 0 (PAD)
 
         // Targets: input_ids shifted left by 1
         for t in 0..(seq_len - 1) {
             targets[row + t] = input_ids[row + t + 1];
         }
-        // targets[row + seq_len - 1] is already 0
-
-        // Loss mask: positions 0..=gl are true
-        for t in 0..=gl {
-            loss_mask[row + t] = true;
-        }
     }
 
     CLMBatch {
@@ -408,7 +421,7 @@ mod tests {
     #[test]
     fn test_clm_batch_format() {
         let seq_len = 256;
-        let batch = generate_clm_batch(8, seq_len, 42, false, 0.0);
+        let batch = generate_clm_batch(8, seq_len, 42, false, 0.0, true);
         assert_eq!(batch.input_ids.len(), 8 * seq_len);
         assert_eq!(batch.targets.len(), 8 * seq_len);
         assert_eq!(batch.loss_mask.len(), 8 * seq_len);
@@ -419,21 +432,23 @@ mod tests {
             let gl = batch.game_lengths[b] as usize;
             let row = b * seq_len;
 
-            // Position 0: outcome token (4273-4277)
+            let pad = vocab::PAD_TOKEN as i16;
+
+            // Position 0: outcome token
             let outcome = batch.input_ids[row];
             assert!(outcome >= vocab::OUTCOME_BASE as i16 && outcome <= vocab::PLY_LIMIT as i16,
                 "Position 0 should be outcome token, got {}", outcome);
 
-            // Positions 1..=gl: move tokens (1-4272)
+            // Positions 1..=gl: move tokens (action IDs 0..1967)
             for t in 1..=gl {
                 let tok = batch.input_ids[row + t];
-                assert!(tok >= 1 && tok <= 4272,
-                    "Position {} should be move token, got {}", t, tok);
+                assert!(tok >= 0 && tok < vocab::NUM_ACTIONS as i16,
+                    "Position {} should be move token (0-1967), got {}", t, tok);
             }
 
-            // Positions gl+1..seq_len: PAD (0)
+            // Positions gl+1..seq_len: PAD
             for t in (gl + 1)..seq_len {
-                assert_eq!(batch.input_ids[row + t], 0,
+                assert_eq!(batch.input_ids[row + t], pad,
                     "Position {} should be PAD, got {}", t, batch.input_ids[row + t]);
             }
 
@@ -442,10 +457,10 @@ mod tests {
                 assert_eq!(batch.targets[row + t], batch.input_ids[row + t + 1],
                     "targets[{}] should equal input_ids[{}]", t, t + 1);
             }
-            assert_eq!(batch.targets[row + seq_len - 1], 0, "Last target should be PAD");
+            assert_eq!(batch.targets[row + seq_len - 1], pad, "Last target should be PAD");
 
             // Target at position gl is PAD (end of game)
-            assert_eq!(batch.targets[row + gl], 0, "Target at game_length should be PAD");
+            assert_eq!(batch.targets[row + gl], pad, "Target at game_length should be PAD");
 
             // Loss mask: true for 0..=gl, false after
             for t in 0..=gl {
@@ -461,8 +476,8 @@ mod tests {
 
     #[test]
     fn test_clm_batch_deterministic() {
-        let b1 = generate_clm_batch(4, 256, 99, false, 0.0);
-        let b2 = generate_clm_batch(4, 256, 99, false, 0.0);
+        let b1 = generate_clm_batch(4, 256, 99, false, 0.0, true);
+        let b2 = generate_clm_batch(4, 256, 99, false, 0.0, true);
         assert_eq!(b1.input_ids, b2.input_ids);
         assert_eq!(b1.targets, b2.targets);
         assert_eq!(b1.loss_mask, b2.loss_mask);
@@ -471,7 +486,7 @@ mod tests {
 
     #[test]
     fn test_clm_batch_outcome_correctness() {
-        let batch = generate_clm_batch(32, 256, 42, false, 0.0);
+        let batch = generate_clm_batch(32, 256, 42, false, 0.0, true);
         for b in 0..32 {
             let gl = batch.game_lengths[b] as usize;
             let tc = batch.termination_codes[b];
@@ -503,8 +518,8 @@ mod tests {
             let gl = batch.game_lengths[b] as usize;
             for t in 0..gl {
                 let tok = batch.move_ids[b * 64 + t];
-                // Tokens should be valid move tokens (1..=4272)
-                assert!(tok >= 1 && tok <= 4272,
+                // Tokens should be valid action IDs (0..1967)
+                assert!(tok >= 0 && tok < vocab::NUM_ACTIONS as i16,
                     "Invalid token at b={} t={}: {}", b, t, tok);
             }
         }
@@ -685,18 +700,27 @@ mod tests {
     }
 
     #[test]
-    fn test_clm_batch_seq_len_consistency() {
-        let batch = generate_clm_batch(4, 32, 42, false, 0.0);
+    fn test_clm_batch_seq_len_consistency_with_outcome() {
+        let batch = generate_clm_batch(4, 32, 42, false, 0.0, true);
         assert_eq!(batch.seq_len, 32);
-        assert_eq!(batch.max_ply, 31); // seq_len - 1
+        assert_eq!(batch.max_ply, 31); // seq_len - 1 when outcome prepended
         assert_eq!(batch.input_ids.len(), 4 * 32);
         assert_eq!(batch.move_ids.len(), 4 * 31);
     }
 
+    #[test]
+    fn test_clm_batch_seq_len_consistency_no_outcome() {
+        let batch = generate_clm_batch(4, 32, 42, false, 0.0, false);
+        assert_eq!(batch.seq_len, 32);
+        assert_eq!(batch.max_ply, 32); // max_ply == seq_len when no outcome
+        assert_eq!(batch.input_ids.len(), 4 * 32);
+        assert_eq!(batch.move_ids.len(), 4 * 32);
+    }
+
     #[test]
     fn test_clm_batch_shift_by_one() {
         // Verify targets[t] == input_ids[t+1] for all t < seq_len-1
-        let batch = generate_clm_batch(4, 64, 42, false, 0.0);
+        let batch = generate_clm_batch(4, 64, 42, false, 0.0, true);
         for b in 0..4 {
             let row = b * 64;
             for t in 0..63 {
@@ -708,7 +732,7 @@ mod tests {
 
     #[test]
     fn test_clm_batch_loss_mask_covers_outcome_and_moves() {
-        let batch = generate_clm_batch(4, 64, 42, false, 0.0);
+        let batch = generate_clm_batch(4, 64, 42, false, 0.0, true);
         for b in 0..4 {
             let gl = batch.game_lengths[b] as usize;
             let row = b * 64;
@@ -726,7 +750,7 @@ mod tests {
     #[test]
     fn test_clm_batch_move_ids_copied_correctly() {
         // move_ids in CLMBatch is the raw game moves (seq_len-1 wide)
-        let batch = generate_clm_batch(2, 32, 42, false, 0.0);
+        let batch = generate_clm_batch(2, 32, 42, false, 0.0, true);
         for b in 0..2 {
             let gl = batch.game_lengths[b] as usize;
             let max_ply = 31;
@@ -794,16 +818,16 @@ mod tests {
 
     #[test]
     fn test_generate_clm_batch_mate_boost_deterministic() {
-        let b1 = generate_clm_batch(4, 64, 42, false, 0.5);
-        let b2 = generate_clm_batch(4, 64, 42, false, 0.5);
+        let b1 = generate_clm_batch(4, 64, 42, false, 0.5, true);
+        let b2 = generate_clm_batch(4, 64, 42, false, 0.5, true);
         assert_eq!(b1.input_ids, b2.input_ids);
         assert_eq!(b1.game_lengths, b2.game_lengths);
     }
 
     #[test]
     fn test_clm_batch_discard_no_ply_limit_outcomes() {
         // With discard_ply_limit=true, outcome token at pos 0 is never PLY_LIMIT (4277)
-        let batch = generate_clm_batch(8, 40, 42, true, 0.0);
+        let batch = generate_clm_batch(8, 40, 42, true, 0.0, true);
         for b in 0..8 {
             let outcome = batch.input_ids[b * 40] as u16;
             assert_ne!(outcome, vocab::PLY_LIMIT,