training models and whatnot

Grant Moore · Grant Moore · commit 01a426ee95cd · 2026-02-24T16:26:51.000-05:00
diff --git a/app/pipeline/analyze.py b/app/pipeline/analyze.py
@@ -373,4 +373,206 @@ def list_models(cfg: GolemConfig, mode: str = None):
     
     if not found_any:
         logger.info("No models found.")
-    logger.info("======================================================")
+    logger.info("======================================================")
+
+
+@register_command("examine")
+def examine(cfg: GolemConfig, module_name: str = "all", target_file: str = None, index: int = 0):
+    r"""
+    Generates a phenomenological saliency map (Grad-CAM) for a specific sequence.
+    
+    This evaluates the model's visual and thermal cortices to identify which spatial 
+    pixels triggered the agent's highest-probability action prediction.
+    
+    Args:
+        cfg (GolemConfig): Centralized configuration object.
+        module_name (str, optional): The specific module dataset to pull a sequence from. Default: ``"all"``
+        target_file (str, optional): The specific model file to load.
+        index (int, optional): The batch index in the dataset to examine. Default: 0
+    """
+    try:
+        import matplotlib.pyplot as plt
+        from captum.attr import LayerGradCam, LayerAttribution
+    except ImportError:
+        logger.error("Captum and matplotlib are required. Run: pip install captum matplotlib")
+        return
+
+    device = torch.device('mps') if torch.backends.mps.is_available() else torch.device("cpu")
+    active_profile = cfg.brain.mode
+    data_dir = Path(resolve_path(cfg.data.dirs["training"])) / active_profile
+    prefix_clean = cfg.data.prefix.rstrip('_')
+    
+    # 1. Load the Dataset
+    if module_name and module_name.lower() == "all":
+        file_pattern = f"{prefix_clean}_*.npz"
+    else:
+        file_pattern = f"{prefix_clean}_{module_name}*.npz"
+            
+    dataset = DoomStreamingDataset(
+        str(data_dir), 
+        seq_len=cfg.training.sequence_length,
+        file_pattern=file_pattern,
+        augment=False,
+        action_names=cfg.training.action_names,
+        dsp_config=cfg.brain.dsp,
+        sensors=cfg.brain.sensors
+    )
+    
+    if len(dataset) == 0:
+        logger.error("No data found to examine.")
+        return
+
+    safe_index = min(max(0, index), len(dataset) - 1)
+    logger.info(f"Loading sequence {safe_index} from dataset...")
+    inputs, actions = dataset[safe_index]
+
+    # 2. Discover Brain Architecture & Load Model
+    model_dir = Path(resolve_path(cfg.data.dirs["model"])) / active_profile
+    
+    if target_file:
+        active_model_path = model_dir / target_file
+        if not active_model_path.exists():
+            logger.error(f"Target model file not found: {active_model_path}")
+            return
+        archives = [active_model_path]
+    else:
+        active_model_path = Path(resolve_path(cfg.data.dirs["training"])) / active_profile / "golem.pth"
+        archives = list(model_dir.glob("*.pth"))
+        
+    apply_latest_parameters(cfg, archives)
+    
+    try:
+        state_dict = torch.load(str(active_model_path), map_location=device, weights_only=True)
+        n_actions = cfg.training.action_space_size
+        if 'output.weight' in state_dict:
+            n_actions = state_dict['output.weight'].shape[0]
+            
+        model = DoomLiquidNet(
+            n_actions=n_actions,
+            cortical_depth=cfg.brain.cortical_depth,
+            working_memory=cfg.brain.working_memory,
+            sensors=cfg.brain.sensors,
+            dsp_config=cfg.brain.dsp
+        ).to(device)
+        model.load_state_dict(state_dict)
+        model.eval()
+    except FileNotFoundError:
+        logger.error(f"No brain found at {active_model_path}. Train first!")
+        return
+
+    # 3. Prepare Tensors
+    # Add batch dimension: (1, Seq_Len, C, H, W)
+    x_vis = inputs['visual'].unsqueeze(0).to(device)
+    x_aud = inputs['audio'].unsqueeze(0).to(device) if 'audio' in inputs else None
+    x_thm = inputs['thermal'].unsqueeze(0).to(device) if 'thermal' in inputs else None
+    
+    seq_len = x_vis.size(1)
+    
+    # 4. Separate history (to build ODE state) from the final prediction frame
+    if seq_len > 1:
+        x_vis_hist = x_vis[:, :-1, ...]
+        x_aud_hist = x_aud[:, :-1, ...] if x_aud is not None else None
+        x_thm_hist = x_thm[:, :-1, ...] if x_thm is not None else None
+        
+        with torch.no_grad():
+            _, hx = model(x_vis_hist, x_aud_hist, x_thm_hist)
+    else:
+        hx = None
+        
+    x_vis_step = x_vis[:, -1:, ...]
+    x_aud_step = x_aud[:, -1:, ...] if x_aud is not None else None
+    x_thm_step = x_thm[:, -1:, ...] if x_thm is not None else None
+
+    # 5. Captum Wrapper
+    # Captum expects a standard tensor-in, tensor-out pass for its hooks.
+    class ModelWrapperStep(torch.nn.Module):
+        def __init__(self, core_model, state):
+            super().__init__()
+            self.model = core_model
+            self.hx = state
+        def forward(self, xv, xa=None, xt=None):
+            logits, _ = self.model(xv, xa, xt, self.hx)
+            return logits[:, 0, :] # Extract the 1 step batch
+            
+    def get_last_conv(module_seq):
+        for layer in reversed(module_seq):
+            if isinstance(layer, torch.nn.Conv2d):
+                return layer
+        return None
+
+    wrapper = ModelWrapperStep(model, hx)
+    
+    # Find the most probable action to attribute
+    with torch.no_grad():
+        final_logits = wrapper(x_vis_step, x_aud_step, x_thm_step)
+        probs = torch.sigmoid(final_logits)[0]
+    
+    target_idx = torch.argmax(probs).item()
+    
+    # Resolve the name dynamically in case the loaded weights expanded the action space
+    action_names = list(cfg.training.action_names)
+    if len(action_names) < n_actions:
+        action_names += [f"ACTION_{i}" for i in range(len(action_names), n_actions)]
+    target_name = action_names[target_idx]
+    
+    logger.info(f"Generating Grad-CAM attributing to highest predicted action: {target_name} ({probs[target_idx]:.2f})")
+
+    # 6. Extract Visual Cortex Heatmap
+    vis_layer = get_last_conv(model.conv)
+    lgc_vis = LayerGradCam(wrapper, vis_layer)
+    attr_vis = lgc_vis.attribute(x_vis_step, target=target_idx, additional_forward_args=(x_aud_step, x_thm_step))
+    attr_vis = LayerAttribution.interpolate(attr_vis, (64, 64))
+    
+    attr_vis_np = attr_vis.squeeze().cpu().detach().numpy()
+    attr_vis_np = np.maximum(attr_vis_np, 0)
+    if np.max(attr_vis_np) > 0:
+        attr_vis_np /= np.max(attr_vis_np)
+
+    # 7. Extract Thermal Cortex Heatmap (If active)
+    attr_thm_np = None
+    if model.use_thermal:
+        thm_layer = get_last_conv(model.thermal_conv)
+        lgc_thm = LayerGradCam(wrapper, thm_layer)
+        attr_thm = lgc_thm.attribute(x_vis_step, target=target_idx, additional_forward_args=(x_aud_step, x_thm_step))
+        attr_thm = LayerAttribution.interpolate(attr_thm, (64, 64))
+        
+        attr_thm_np = attr_thm.squeeze().cpu().detach().numpy()
+        attr_thm_np = np.maximum(attr_thm_np, 0)
+        if np.max(attr_thm_np) > 0:
+            attr_thm_np /= np.max(attr_thm_np)
+
+    # 8. Render Side-by-Side Validation
+    img_vis_rgb = x_vis_step[0, 0, :3, ...].permute(1, 2, 0).cpu().numpy()
+    
+    cols = 4 if model.use_thermal else 2
+    fig, axes = plt.subplots(1, cols, figsize=(4 * cols, 4))
+    if cols == 2: axes = [axes[0], axes[1]] # Ensure list formatting
+
+    # Plot Visual
+    axes[0].imshow(img_vis_rgb)
+    axes[0].set_title("Visual Input (RGB)")
+    axes[0].axis('off')
+    
+    axes[1].imshow(img_vis_rgb)
+    axes[1].imshow(attr_vis_np, cmap='jet', alpha=0.5)
+    axes[1].set_title(f"Visual Grad-CAM\nTarget: {target_name}")
+    axes[1].axis('off')
+    
+    # Plot Thermal
+    if model.use_thermal:
+        img_thm = x_thm_step[0, 0, 0, ...].cpu().numpy()
+        axes[2].imshow(img_thm, cmap='gray')
+        axes[2].set_title("Thermal Input (Mask)")
+        axes[2].axis('off')
+        
+        axes[3].imshow(img_thm, cmap='gray')
+        axes[3].imshow(attr_thm_np, cmap='jet', alpha=0.5)
+        axes[3].set_title(f"Thermal Grad-CAM\nTarget: {target_name}")
+        axes[3].axis('off')
+
+    out_path = Path("examine_output.png")
+    plt.tight_layout()
+    plt.savefig(out_path, dpi=150, bbox_inches='tight')
+    plt.close(fig)
+    
+    logger.info(f"Saliency map saved successfully to: {out_path.absolute()}")
diff --git a/app/pipeline/train.py b/app/pipeline/train.py
@@ -85,7 +85,7 @@ def train(cfg: GolemConfig, module_name: str = None, include_recovery: bool = Fa
         logger.error(f"No training data found matching pattern: {file_pattern} in {data_dirs}")
         return
 
-    # ---> NEW: Mount the Stratified Sampler 
+    # Mount the Stratified Sampler 
     sampler = StatefulStratifiedBatchSampler(
         base_episodes=dataset.base_episodes,
         recovery_episodes=dataset.recovery_episodes,
diff --git a/conf/app.yaml b/conf/app.yaml
@@ -20,8 +20,8 @@ brain:
     visual: true
     depth: false
     audio: false
-    thermal: false
-  activation: 0.5
+    thermal: true
+  activation: 0.4
   cortical_depth: 2
   working_memory: 128
   dsp:
diff --git a/data/model/simple/2026-02-24.c-2.w-128.v-1.d-0.a-0.t-0.focal-a025-g15.1.pth b/data/model/simple/2026-02-24.c-2.w-128.v-1.d-0.a-0.t-0.focal-a025-g15.1.pth
diff --git a/docs/board/closed/issues.md b/docs/board/closed/issues.md
@@ -59,3 +59,15 @@ Implement Stateful BPTT in `train.py`:
 2. Retain the hidden state output `hx` from the previous batch.
 3. Detach the state from the computational graph (`hx = hx.detach()`) to prevent backpropagating into infinite history.
 4. Pass the detached state as the prior for the subsequent batch.
+
+## Issue: Phenomenological Saliency Mapping (Grad-CAM)
+
+**Status:** Open | **Priority:** High | **Opened**: 2026/02/21
+
+**Description:**
+
+We currently lack explainable AI (XAI) tooling to verify that the agent's distinct sensor cortices (Visual, Depth, Thermal) are specializing as intended. We need visual proof that the Visual Cortex focuses on static geometry while the Thermal Cortex tracks dynamic threats.
+
+**Proposed Solution:**
+
+Integrate the `captum` library to generate Gradient-weighted Class Activation Mapping (Grad-CAM) heatmaps. Create an `examine` command that takes a single sequence from the dataset, runs `captum.attr.LayerGradCam` on the final convolutional layers of the respective cortices, and saves the upsampled heatmaps as side-by-side `.png` files. This will allow us to physically view the spatial stimuli responsible for triggering specific action logits.
diff --git a/docs/board/issues.md b/docs/board/issues.md
@@ -14,7 +14,7 @@ The `DoomStreamingDataset` currently applies dynamic NumPy transposition, mirror
 
 Refactor the `DataLoader` initialization in `train.py` to offload ETL transformations to background processes. Implement `num_workers` (e.g., 4), enable `pin_memory=True` for faster Host-to-Device memory transfers, and establish a `prefetch_factor`.
 
-## Issue 3: Memory Overflow Risk in Dataset Loading (RAM Bottleneck)
+## Issue 2: Memory Overflow Risk in Dataset Loading (RAM Bottleneck)
 
 **Status:** Open | **Priority:** Medium | **Opened**: 2026/02/21
 
@@ -26,19 +26,7 @@ In `dataset.py`, `DoomStreamingDataset` currently iterates through all `.npz` fi
 
 Migrate the storage backend from compressed `.npz` archives to HDF5 (`h5py`) format, or utilize NumPy's `mmap_mode='r'` to memory-map the data on disk. This allows the `Dataset` to lazily stream tensor blocks directly from the NVMe/SSD without pre-loading the entire corpus into volatile memory.
 
-## Issue 4: Phenomenological Saliency Mapping (Grad-CAM)
-
-**Status:** Open | **Priority:** High | **Opened**: 2026/02/21
-
-**Description:**
-
-We currently lack explainable AI (XAI) tooling to verify that the agent's distinct sensor cortices (Visual, Depth, Thermal) are specializing as intended. We need visual proof that the Visual Cortex focuses on static geometry while the Thermal Cortex tracks dynamic threats.
-
-**Proposed Solution:**
-
-Integrate the `captum` library to generate Gradient-weighted Class Activation Mapping (Grad-CAM) heatmaps. Create an `examine` command that takes a single sequence from the dataset, runs `captum.attr.LayerGradCam` on the final convolutional layers of the respective cortices, and saves the upsampled heatmaps as side-by-side `.png` files. This will allow us to physically view the spatial stimuli responsible for triggering specific action logits.
-
-## Issue 5: Audit Validation Leak & Redundancy (Train/Test Split)
+## Issue 3: Audit Validation Leak & Redundancy (Train/Test Split)
 
 **Status:** Open | **Priority:** Medium | **Opened**: 2026/02/21
 
diff --git a/docs/board/roadmap.md b/docs/board/roadmap.md
@@ -138,6 +138,36 @@ See [Phase Archive](./closed/phases.md) for the project's completed phases.
 
   **Assessment**: Acceptable, if gated behind a configuration property that is disabled by default. Cleared to implement.
   
+## Phase 10: Cortical Auxiliary Heads (Isolated Representation Learning)  ان
+
+*Goal:* Attach secondary linear heads directly to the latent output vectors of specific cortices (e.g., Thermal, Visual) prior to sensorimotor concatenation. This enables the application of targeted, isolated loss functions (e.g., BCE for enemy counting on the thermal mask) directly to the sub-networks, accelerating feature extraction without waiting for the slow, end-to-end action gradient.
+
+### 1. Configuration Layer
+
+* [ ] **Auxiliary Toggles:** Update `app.yaml` to include an `auxiliary_heads` configuration block under `brain` (e.g., toggling thermal enemy counting) and corresponding $\lambda$ weights under the `loss` block.
+* [ ] **State Validation:** Update `config.py` to parse the new auxiliary settings and loss weights during pipeline initialization.
+
+### 2. The Brain (Architecture Redesign)
+
+* [ ] **Secondary Linear Heads:** Modify `DoomLiquidNet` in `brain.py` to conditionally instantiate `nn.Linear` layers branching directly off the flattened cortical vectors (e.g., $T(t)$ or $V(t)$).
+* [ ] **Multi-Output Forward Pass:** Update the `forward` method to return a dictionary of auxiliary predictions alongside the primary action logits and the recurrent hidden state.
+
+### 3. The Pipeline (ETL & Training)
+
+* [ ] **Ground Truth Extraction:** Update `record.py` and `DoomStreamingDataset` to extract, store, and stream the necessary ground truth labels for the auxiliary tasks (e.g., parsing the exact number of visible enemies from ViZDoom's underlying `state` variables).
+* [ ] **Composite Objective Function:** Modify the optimization loop in `train.py` to compute and sum the isolated losses against the main behavioral cloning target: $\mathcal{L}_{Total} = \mathcal{L}_{Action} + \lambda \mathcal{L}_{Aux\_Thermal} + \dots$
+
+!!! danger "Risk Assessment"
+  **Training Overhead: Moderate**
+  
+  Optimizing a composite loss function requires calculating gradients for both the primary classification head and the auxiliary heads simultaneously. However, the additional parameters (small linear layers) are mathematically trivial compared to the deep CNNs. The primary overhead is I/O related: modifying the dataset to extract and stream additional ground truth labels from the engine state.
+  
+  **Runtime Overhead: Zero**
+  
+  This is a purely structural training enhancement. Because the agent only requires the output of the primary Motor Cortex to play the game, the auxiliary heads can be completely detached and bypassed during live inference, maintaining strict temporal compliance with the $35\text{Hz}$ engine loop.
+
+  **Assessment**: High reward, zero runtime risk. Cleared to implement.
+
 ---
 
 ## Distant Future
diff --git a/docs/model/brain.md b/docs/model/brain.md
@@ -14,22 +14,22 @@ flowchart TD
     subgraph VisCortex [Visual Cortex]
         Concat_Vis{"Concat Channels"}
         VisIn["Input (4x64x64)"]
-        VisCNN["D Conv2d Layers + ReLU\n(stride=2, padding=1)"]
+        VisCNN["D Conv2d Layers + ReLU(stride=2, padding=1)"]
         VisFlat["Flatten"]
         V_t["Latent Vector V(t)"]
     end
 
     subgraph ThmCortex [Thermal Cortex]
         ThmIn["Input (1x64x64)"]
-        ThmCNN["D Conv2d Layers + ReLU\n(stride=2, padding=1)"]
+        ThmCNN["D Conv2d Layers + ReLU(stride=2, padding=1)"]
         ThmFlat["Flatten"]
         T_t["Latent Vector T(t)"]
     end
 
     subgraph AudCortex [Auditory Cortex]
         DSP["DSP: MelSpectrogram\n& AmplitudeToDB"]
         Mel["2D Spectrogram\n(2 x H_mels x W_time)"]
-        AudCNN["3 Conv2d Layers + ReLU\n(stride=2, padding=1)"]
+        AudCNN["3 Conv2d Layers + ReLU(stride=2, padding=1)"]
         AudPool["AdaptiveAvgPool2d(1, 1)"]
         AudFlat["Flatten"]
         A_t["Latent Vector A(t)"]
@@ -38,7 +38,7 @@ flowchart TD
     subgraph Core [Liquid Core & Motor Head]
         Fusion{"Concatenate ⊕"}
         I_t["Multi-Modal Input I(t)"]
-        CfC["Closed-form Continuous (CfC) Cell\nunits = working_memory"]
+        CfC["Closed-form Continuous (CfC) Cell = working_memory"]
         hx_in[/"Previous State x(t-1)"/]
         hx_out[/"Next State x(t)"/]
         Linear["Linear Layer (n_actions)"]
diff --git a/requirements.txt b/requirements.txt
@@ -1,11 +1,13 @@
 # Application
-opencv-python
-scikit-learn
+captum
 Jinja2
+matplotlib
 ncps 
+opencv-python
 pydantic
 pynput
 PyYaml
+scikit-learn
 torch 
 torchaudio
 torchvision