Skip to content

Commit 01a426e

Browse files
author
Grant Moore
committed
training models and whatnot
1 parent 460cd9f commit 01a426e

File tree

9 files changed

+258
-24
lines changed

9 files changed

+258
-24
lines changed

app/pipeline/analyze.py

Lines changed: 203 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -373,4 +373,206 @@ def list_models(cfg: GolemConfig, mode: str = None):
373373

374374
if not found_any:
375375
logger.info("No models found.")
376-
logger.info("======================================================")
376+
logger.info("======================================================")
377+
378+
379+
@register_command("examine")
380+
def examine(cfg: GolemConfig, module_name: str = "all", target_file: str = None, index: int = 0):
381+
r"""
382+
Generates a phenomenological saliency map (Grad-CAM) for a specific sequence.
383+
384+
This evaluates the model's visual and thermal cortices to identify which spatial
385+
pixels triggered the agent's highest-probability action prediction.
386+
387+
Args:
388+
cfg (GolemConfig): Centralized configuration object.
389+
module_name (str, optional): The specific module dataset to pull a sequence from. Default: ``"all"``
390+
target_file (str, optional): The specific model file to load.
391+
index (int, optional): The batch index in the dataset to examine. Default: 0
392+
"""
393+
try:
394+
import matplotlib.pyplot as plt
395+
from captum.attr import LayerGradCam, LayerAttribution
396+
except ImportError:
397+
logger.error("Captum and matplotlib are required. Run: pip install captum matplotlib")
398+
return
399+
400+
device = torch.device('mps') if torch.backends.mps.is_available() else torch.device("cpu")
401+
active_profile = cfg.brain.mode
402+
data_dir = Path(resolve_path(cfg.data.dirs["training"])) / active_profile
403+
prefix_clean = cfg.data.prefix.rstrip('_')
404+
405+
# 1. Load the Dataset
406+
if module_name and module_name.lower() == "all":
407+
file_pattern = f"{prefix_clean}_*.npz"
408+
else:
409+
file_pattern = f"{prefix_clean}_{module_name}*.npz"
410+
411+
dataset = DoomStreamingDataset(
412+
str(data_dir),
413+
seq_len=cfg.training.sequence_length,
414+
file_pattern=file_pattern,
415+
augment=False,
416+
action_names=cfg.training.action_names,
417+
dsp_config=cfg.brain.dsp,
418+
sensors=cfg.brain.sensors
419+
)
420+
421+
if len(dataset) == 0:
422+
logger.error("No data found to examine.")
423+
return
424+
425+
safe_index = min(max(0, index), len(dataset) - 1)
426+
logger.info(f"Loading sequence {safe_index} from dataset...")
427+
inputs, actions = dataset[safe_index]
428+
429+
# 2. Discover Brain Architecture & Load Model
430+
model_dir = Path(resolve_path(cfg.data.dirs["model"])) / active_profile
431+
432+
if target_file:
433+
active_model_path = model_dir / target_file
434+
if not active_model_path.exists():
435+
logger.error(f"Target model file not found: {active_model_path}")
436+
return
437+
archives = [active_model_path]
438+
else:
439+
active_model_path = Path(resolve_path(cfg.data.dirs["training"])) / active_profile / "golem.pth"
440+
archives = list(model_dir.glob("*.pth"))
441+
442+
apply_latest_parameters(cfg, archives)
443+
444+
try:
445+
state_dict = torch.load(str(active_model_path), map_location=device, weights_only=True)
446+
n_actions = cfg.training.action_space_size
447+
if 'output.weight' in state_dict:
448+
n_actions = state_dict['output.weight'].shape[0]
449+
450+
model = DoomLiquidNet(
451+
n_actions=n_actions,
452+
cortical_depth=cfg.brain.cortical_depth,
453+
working_memory=cfg.brain.working_memory,
454+
sensors=cfg.brain.sensors,
455+
dsp_config=cfg.brain.dsp
456+
).to(device)
457+
model.load_state_dict(state_dict)
458+
model.eval()
459+
except FileNotFoundError:
460+
logger.error(f"No brain found at {active_model_path}. Train first!")
461+
return
462+
463+
# 3. Prepare Tensors
464+
# Add batch dimension: (1, Seq_Len, C, H, W)
465+
x_vis = inputs['visual'].unsqueeze(0).to(device)
466+
x_aud = inputs['audio'].unsqueeze(0).to(device) if 'audio' in inputs else None
467+
x_thm = inputs['thermal'].unsqueeze(0).to(device) if 'thermal' in inputs else None
468+
469+
seq_len = x_vis.size(1)
470+
471+
# 4. Separate history (to build ODE state) from the final prediction frame
472+
if seq_len > 1:
473+
x_vis_hist = x_vis[:, :-1, ...]
474+
x_aud_hist = x_aud[:, :-1, ...] if x_aud is not None else None
475+
x_thm_hist = x_thm[:, :-1, ...] if x_thm is not None else None
476+
477+
with torch.no_grad():
478+
_, hx = model(x_vis_hist, x_aud_hist, x_thm_hist)
479+
else:
480+
hx = None
481+
482+
x_vis_step = x_vis[:, -1:, ...]
483+
x_aud_step = x_aud[:, -1:, ...] if x_aud is not None else None
484+
x_thm_step = x_thm[:, -1:, ...] if x_thm is not None else None
485+
486+
# 5. Captum Wrapper
487+
# Captum expects a standard tensor-in, tensor-out pass for its hooks.
488+
class ModelWrapperStep(torch.nn.Module):
489+
def __init__(self, core_model, state):
490+
super().__init__()
491+
self.model = core_model
492+
self.hx = state
493+
def forward(self, xv, xa=None, xt=None):
494+
logits, _ = self.model(xv, xa, xt, self.hx)
495+
return logits[:, 0, :] # Extract the 1 step batch
496+
497+
def get_last_conv(module_seq):
498+
for layer in reversed(module_seq):
499+
if isinstance(layer, torch.nn.Conv2d):
500+
return layer
501+
return None
502+
503+
wrapper = ModelWrapperStep(model, hx)
504+
505+
# Find the most probable action to attribute
506+
with torch.no_grad():
507+
final_logits = wrapper(x_vis_step, x_aud_step, x_thm_step)
508+
probs = torch.sigmoid(final_logits)[0]
509+
510+
target_idx = torch.argmax(probs).item()
511+
512+
# Resolve the name dynamically in case the loaded weights expanded the action space
513+
action_names = list(cfg.training.action_names)
514+
if len(action_names) < n_actions:
515+
action_names += [f"ACTION_{i}" for i in range(len(action_names), n_actions)]
516+
target_name = action_names[target_idx]
517+
518+
logger.info(f"Generating Grad-CAM attributing to highest predicted action: {target_name} ({probs[target_idx]:.2f})")
519+
520+
# 6. Extract Visual Cortex Heatmap
521+
vis_layer = get_last_conv(model.conv)
522+
lgc_vis = LayerGradCam(wrapper, vis_layer)
523+
attr_vis = lgc_vis.attribute(x_vis_step, target=target_idx, additional_forward_args=(x_aud_step, x_thm_step))
524+
attr_vis = LayerAttribution.interpolate(attr_vis, (64, 64))
525+
526+
attr_vis_np = attr_vis.squeeze().cpu().detach().numpy()
527+
attr_vis_np = np.maximum(attr_vis_np, 0)
528+
if np.max(attr_vis_np) > 0:
529+
attr_vis_np /= np.max(attr_vis_np)
530+
531+
# 7. Extract Thermal Cortex Heatmap (If active)
532+
attr_thm_np = None
533+
if model.use_thermal:
534+
thm_layer = get_last_conv(model.thermal_conv)
535+
lgc_thm = LayerGradCam(wrapper, thm_layer)
536+
attr_thm = lgc_thm.attribute(x_vis_step, target=target_idx, additional_forward_args=(x_aud_step, x_thm_step))
537+
attr_thm = LayerAttribution.interpolate(attr_thm, (64, 64))
538+
539+
attr_thm_np = attr_thm.squeeze().cpu().detach().numpy()
540+
attr_thm_np = np.maximum(attr_thm_np, 0)
541+
if np.max(attr_thm_np) > 0:
542+
attr_thm_np /= np.max(attr_thm_np)
543+
544+
# 8. Render Side-by-Side Validation
545+
img_vis_rgb = x_vis_step[0, 0, :3, ...].permute(1, 2, 0).cpu().numpy()
546+
547+
cols = 4 if model.use_thermal else 2
548+
fig, axes = plt.subplots(1, cols, figsize=(4 * cols, 4))
549+
if cols == 2: axes = [axes[0], axes[1]] # Ensure list formatting
550+
551+
# Plot Visual
552+
axes[0].imshow(img_vis_rgb)
553+
axes[0].set_title("Visual Input (RGB)")
554+
axes[0].axis('off')
555+
556+
axes[1].imshow(img_vis_rgb)
557+
axes[1].imshow(attr_vis_np, cmap='jet', alpha=0.5)
558+
axes[1].set_title(f"Visual Grad-CAM\nTarget: {target_name}")
559+
axes[1].axis('off')
560+
561+
# Plot Thermal
562+
if model.use_thermal:
563+
img_thm = x_thm_step[0, 0, 0, ...].cpu().numpy()
564+
axes[2].imshow(img_thm, cmap='gray')
565+
axes[2].set_title("Thermal Input (Mask)")
566+
axes[2].axis('off')
567+
568+
axes[3].imshow(img_thm, cmap='gray')
569+
axes[3].imshow(attr_thm_np, cmap='jet', alpha=0.5)
570+
axes[3].set_title(f"Thermal Grad-CAM\nTarget: {target_name}")
571+
axes[3].axis('off')
572+
573+
out_path = Path("examine_output.png")
574+
plt.tight_layout()
575+
plt.savefig(out_path, dpi=150, bbox_inches='tight')
576+
plt.close(fig)
577+
578+
logger.info(f"Saliency map saved successfully to: {out_path.absolute()}")

app/pipeline/train.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def train(cfg: GolemConfig, module_name: str = None, include_recovery: bool = Fa
8585
logger.error(f"No training data found matching pattern: {file_pattern} in {data_dirs}")
8686
return
8787

88-
# ---> NEW: Mount the Stratified Sampler
88+
# Mount the Stratified Sampler
8989
sampler = StatefulStratifiedBatchSampler(
9090
base_episodes=dataset.base_episodes,
9191
recovery_episodes=dataset.recovery_episodes,

conf/app.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ brain:
2020
visual: true
2121
depth: false
2222
audio: false
23-
thermal: false
24-
activation: 0.5
23+
thermal: true
24+
activation: 0.4
2525
cortical_depth: 2
2626
working_memory: 128
2727
dsp:
Binary file not shown.

docs/board/closed/issues.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,15 @@ Implement Stateful BPTT in `train.py`:
5959
2. Retain the hidden state output `hx` from the previous batch.
6060
3. Detach the state from the computational graph (`hx = hx.detach()`) to prevent backpropagating into infinite history.
6161
4. Pass the detached state as the prior for the subsequent batch.
62+
63+
## Issue: Phenomenological Saliency Mapping (Grad-CAM)
64+
65+
**Status:** Open | **Priority:** High | **Opened**: 2026/02/21
66+
67+
**Description:**
68+
69+
We currently lack explainable AI (XAI) tooling to verify that the agent's distinct sensor cortices (Visual, Depth, Thermal) are specializing as intended. We need visual proof that the Visual Cortex focuses on static geometry while the Thermal Cortex tracks dynamic threats.
70+
71+
**Proposed Solution:**
72+
73+
Integrate the `captum` library to generate Gradient-weighted Class Activation Mapping (Grad-CAM) heatmaps. Create an `examine` command that takes a single sequence from the dataset, runs `captum.attr.LayerGradCam` on the final convolutional layers of the respective cortices, and saves the upsampled heatmaps as side-by-side `.png` files. This will allow us to physically view the spatial stimuli responsible for triggering specific action logits.

docs/board/issues.md

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ The `DoomStreamingDataset` currently applies dynamic NumPy transposition, mirror
1414

1515
Refactor the `DataLoader` initialization in `train.py` to offload ETL transformations to background processes. Implement `num_workers` (e.g., 4), enable `pin_memory=True` for faster Host-to-Device memory transfers, and establish a `prefetch_factor`.
1616

17-
## Issue 3: Memory Overflow Risk in Dataset Loading (RAM Bottleneck)
17+
## Issue 2: Memory Overflow Risk in Dataset Loading (RAM Bottleneck)
1818

1919
**Status:** Open | **Priority:** Medium | **Opened**: 2026/02/21
2020

@@ -26,19 +26,7 @@ In `dataset.py`, `DoomStreamingDataset` currently iterates through all `.npz` fi
2626

2727
Migrate the storage backend from compressed `.npz` archives to HDF5 (`h5py`) format, or utilize NumPy's `mmap_mode='r'` to memory-map the data on disk. This allows the `Dataset` to lazily stream tensor blocks directly from the NVMe/SSD without pre-loading the entire corpus into volatile memory.
2828

29-
## Issue 4: Phenomenological Saliency Mapping (Grad-CAM)
30-
31-
**Status:** Open | **Priority:** High | **Opened**: 2026/02/21
32-
33-
**Description:**
34-
35-
We currently lack explainable AI (XAI) tooling to verify that the agent's distinct sensor cortices (Visual, Depth, Thermal) are specializing as intended. We need visual proof that the Visual Cortex focuses on static geometry while the Thermal Cortex tracks dynamic threats.
36-
37-
**Proposed Solution:**
38-
39-
Integrate the `captum` library to generate Gradient-weighted Class Activation Mapping (Grad-CAM) heatmaps. Create an `examine` command that takes a single sequence from the dataset, runs `captum.attr.LayerGradCam` on the final convolutional layers of the respective cortices, and saves the upsampled heatmaps as side-by-side `.png` files. This will allow us to physically view the spatial stimuli responsible for triggering specific action logits.
40-
41-
## Issue 5: Audit Validation Leak & Redundancy (Train/Test Split)
29+
## Issue 3: Audit Validation Leak & Redundancy (Train/Test Split)
4230

4331
**Status:** Open | **Priority:** Medium | **Opened**: 2026/02/21
4432

docs/board/roadmap.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,36 @@ See [Phase Archive](./closed/phases.md) for the project's completed phases.
138138

139139
**Assessment**: Acceptable, if gated behind a configuration property that is disabled by default. Cleared to implement.
140140

141+
## Phase 10: Cortical Auxiliary Heads (Isolated Representation Learning) ان
142+
143+
*Goal:* Attach secondary linear heads directly to the latent output vectors of specific cortices (e.g., Thermal, Visual) prior to sensorimotor concatenation. This enables the application of targeted, isolated loss functions (e.g., BCE for enemy counting on the thermal mask) directly to the sub-networks, accelerating feature extraction without waiting for the slow, end-to-end action gradient.
144+
145+
### 1. Configuration Layer
146+
147+
* [ ] **Auxiliary Toggles:** Update `app.yaml` to include an `auxiliary_heads` configuration block under `brain` (e.g., toggling thermal enemy counting) and corresponding $\lambda$ weights under the `loss` block.
148+
* [ ] **State Validation:** Update `config.py` to parse the new auxiliary settings and loss weights during pipeline initialization.
149+
150+
### 2. The Brain (Architecture Redesign)
151+
152+
* [ ] **Secondary Linear Heads:** Modify `DoomLiquidNet` in `brain.py` to conditionally instantiate `nn.Linear` layers branching directly off the flattened cortical vectors (e.g., $T(t)$ or $V(t)$).
153+
* [ ] **Multi-Output Forward Pass:** Update the `forward` method to return a dictionary of auxiliary predictions alongside the primary action logits and the recurrent hidden state.
154+
155+
### 3. The Pipeline (ETL & Training)
156+
157+
* [ ] **Ground Truth Extraction:** Update `record.py` and `DoomStreamingDataset` to extract, store, and stream the necessary ground truth labels for the auxiliary tasks (e.g., parsing the exact number of visible enemies from ViZDoom's underlying `state` variables).
158+
* [ ] **Composite Objective Function:** Modify the optimization loop in `train.py` to compute and sum the isolated losses against the main behavioral cloning target: $\mathcal{L}_{Total} = \mathcal{L}_{Action} + \lambda \mathcal{L}_{Aux\_Thermal} + \dots$
159+
160+
!!! danger "Risk Assessment"
161+
**Training Overhead: Moderate**
162+
163+
Optimizing a composite loss function requires calculating gradients for both the primary classification head and the auxiliary heads simultaneously. However, the additional parameters (small linear layers) are mathematically trivial compared to the deep CNNs. The primary overhead is I/O related: modifying the dataset to extract and stream additional ground truth labels from the engine state.
164+
165+
**Runtime Overhead: Zero**
166+
167+
This is a purely structural training enhancement. Because the agent only requires the output of the primary Motor Cortex to play the game, the auxiliary heads can be completely detached and bypassed during live inference, maintaining strict temporal compliance with the $35\text{Hz}$ engine loop.
168+
169+
**Assessment**: High reward, zero runtime risk. Cleared to implement.
170+
141171
---
142172

143173
## Distant Future

docs/model/brain.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,22 @@ flowchart TD
1414
subgraph VisCortex [Visual Cortex]
1515
Concat_Vis{"Concat Channels"}
1616
VisIn["Input (4x64x64)"]
17-
VisCNN["D Conv2d Layers + ReLU\n(stride=2, padding=1)"]
17+
VisCNN["D Conv2d Layers + ReLU(stride=2, padding=1)"]
1818
VisFlat["Flatten"]
1919
V_t["Latent Vector V(t)"]
2020
end
2121
2222
subgraph ThmCortex [Thermal Cortex]
2323
ThmIn["Input (1x64x64)"]
24-
ThmCNN["D Conv2d Layers + ReLU\n(stride=2, padding=1)"]
24+
ThmCNN["D Conv2d Layers + ReLU(stride=2, padding=1)"]
2525
ThmFlat["Flatten"]
2626
T_t["Latent Vector T(t)"]
2727
end
2828
2929
subgraph AudCortex [Auditory Cortex]
3030
DSP["DSP: MelSpectrogram\n& AmplitudeToDB"]
3131
Mel["2D Spectrogram\n(2 x H_mels x W_time)"]
32-
AudCNN["3 Conv2d Layers + ReLU\n(stride=2, padding=1)"]
32+
AudCNN["3 Conv2d Layers + ReLU(stride=2, padding=1)"]
3333
AudPool["AdaptiveAvgPool2d(1, 1)"]
3434
AudFlat["Flatten"]
3535
A_t["Latent Vector A(t)"]
@@ -38,7 +38,7 @@ flowchart TD
3838
subgraph Core [Liquid Core & Motor Head]
3939
Fusion{"Concatenate ⊕"}
4040
I_t["Multi-Modal Input I(t)"]
41-
CfC["Closed-form Continuous (CfC) Cell\nunits = working_memory"]
41+
CfC["Closed-form Continuous (CfC) Cell = working_memory"]
4242
hx_in[/"Previous State x(t-1)"/]
4343
hx_out[/"Next State x(t)"/]
4444
Linear["Linear Layer (n_actions)"]

requirements.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
# Application
2-
opencv-python
3-
scikit-learn
2+
captum
43
Jinja2
4+
matplotlib
55
ncps
6+
opencv-python
67
pydantic
78
pynput
89
PyYaml
10+
scikit-learn
911
torch
1012
torchaudio
1113
torchvision

0 commit comments

Comments
 (0)