CambrianTech · joelteply · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -1003,6 +1003,63 @@ commands/example/
 
 **Never import server/browser code IN shared files!**
 
+### Rust-Backed Commands (IPC Mixin Pattern)
+
+When a command is backed by Rust (via continuum-core IPC), it requires **THREE layers**:
+
+```
+1. CommandSpec JSON   →  generator/specs/gpu-stats.json
+2. CommandGenerator   →  npx tsx generator/CommandGenerator.ts generator/specs/gpu-stats.json
+3. IPC Mixin          →  workers/continuum-core/bindings/modules/gpu.ts
+```
+
+**Step-by-step workflow:**
+
+```bash
+# 1. Create the Rust module (ServiceModule trait) with IPC commands
+#    e.g., modules/gpu.rs handles "gpu/stats", "gpu/pressure"
+
+# 2. Create a CommandSpec JSON
+cat > generator/specs/gpu-stats.json << 'EOF'
+{
+  "name": "gpu/stats",
+  "description": "Query GPU memory stats",
+  "params": [...],
+  "results": [...],
+  "examples": [...],
+  "accessLevel": "ai-safe"
+}
+EOF
+
+# 3. Run the generator (creates shared/Types, server/Command, browser/Command, README, tests)
+npx tsx generator/CommandGenerator.ts generator/specs/gpu-stats.json
+
+# 4. Create IPC mixin (snake_case Rust → camelCase TypeScript)
+#    workers/continuum-core/bindings/modules/gpu.ts
+#    Pattern: export function GpuMixin<T>(Base: T) { return class extends Base { ... } }
+
+# 5. Add mixin to RustCoreIPC.ts composition chain
+#    import { GpuMixin } from './modules/gpu';
+#    const ComposedClient = ... GpuMixin(RuntimeMixin( ... )) ...
+
+# 6. Implement server command to use mixin
+#    const stats = await this.rustClient.gpuStats();
+
+# 7. Build and verify
+npm run build:ts && npm start
+./jtag gpu/stats
+```
+
+**The three-layer architecture:**
+
+| Layer | File | Purpose |
+|-------|------|---------|
+| Rust IPC | `modules/gpu.rs` | ServiceModule, handles `gpu/stats` |
+| TS Mixin | `bindings/modules/gpu.ts` | snake_case→camelCase, typed wrapper |
+| TS Command | `commands/gpu/stats/` | Generated scaffold, uses mixin |
+
+**Without the mixin + command layer**, Rust IPC commands exist but are invisible to `./jtag` and the command system. The generator creates discoverability (README, help text, CLI params).
+
 ---
 
 ## 📸 WIDGET DOM PATH

diff --git a/src/browser/generated.ts b/src/browser/generated.ts
@@ -1,7 +1,7 @@
 /**
  * Browser Structure Registry - Auto-generated
  *
- * Contains 11 daemons and 221 commands and 2 adapters and 28 widgets.
+ * Contains 11 daemons and 222 commands and 2 adapters and 28 widgets.
  * Generated by scripts/generate-structure.ts - DO NOT EDIT MANUALLY
  */
 
@@ -135,6 +135,7 @@ import { GenomeJobStatusBrowserCommand } from './../commands/genome/job-status/b
 import { GenomeTrainBrowserCommand } from './../commands/genome/train/browser/GenomeTrainBrowserCommand';
 import { GenomeTrainingExportBrowserCommand } from './../commands/genome/training-export/browser/GenomeTrainingExportBrowserCommand';
 import { GenomeTrainingPipelineBrowserCommand } from './../commands/genome/training-pipeline/browser/GenomeTrainingPipelineBrowserCommand';
+import { GpuStatsBrowserCommand } from './../commands/gpu/stats/browser/GpuStatsBrowserCommand';
 import { HelpBrowserCommand } from './../commands/help/browser/HelpBrowserCommand';
 import { IndicatorBrowserCommand } from './../commands/indicator/browser/IndicatorBrowserCommand';
 import { InferenceGenerateBrowserCommand } from './../commands/inference/generate/browser/InferenceGenerateBrowserCommand';
@@ -923,6 +924,11 @@ export const BROWSER_COMMANDS: CommandEntry[] = [
     className: 'GenomeTrainingPipelineBrowserCommand',
     commandClass: GenomeTrainingPipelineBrowserCommand
   },
+{
+    name: 'gpu/stats',
+    className: 'GpuStatsBrowserCommand',
+    commandClass: GpuStatsBrowserCommand
+  },
 {
     name: 'help',
     className: 'HelpBrowserCommand',

diff --git a/src/commands/genome/academy-session/server/GenomeAcademySessionServerCommand.ts b/src/commands/genome/academy-session/server/GenomeAcademySessionServerCommand.ts
@@ -141,6 +141,9 @@ export class GenomeAcademySessionServerCommand extends CommandBase<GenomeAcademy
     // 3. Submit teacher sentinel
     // PipelineStep[] (Rust bindings) → SentinelStep[] (TS definitions) — structurally compatible wire types
     const teacherSteps = teacherPipeline.steps as unknown as SentinelStep[];
+    // Academy sessions run multiple topics (curriculum → synthesize → train → exam per topic).
+    // Each topic takes 3-7 minutes, so 30 minutes covers sessions up to ~6 topics comfortably.
+    const pipelineTimeout = 1800;
     const modePrefixMap = { knowledge: '', coding: 'coding-', project: 'project-' } as const;
     const modePrefix = modePrefixMap[mode];
     const modeLabel = mode === 'project' ? 'Project' : mode === 'coding' ? 'Coding' : 'Knowledge';
@@ -160,6 +163,7 @@ export class GenomeAcademySessionServerCommand extends CommandBase<GenomeAcademy
       },
       parentPersonaId: personaId,
       sentinelName: teacherName,
+      timeout: pipelineTimeout,
     });
 
     const teacherHandle = teacherResult.handle ?? '';
@@ -181,6 +185,7 @@ export class GenomeAcademySessionServerCommand extends CommandBase<GenomeAcademy
       },
       parentPersonaId: personaId,
       sentinelName: studentName,
+      timeout: pipelineTimeout,
     });
 
     const studentHandle = studentResult.handle ?? '';

diff --git a/src/commands/genome/dataset-synthesize/server/GenomeDatasetSynthesizeServerCommand.ts b/src/commands/genome/dataset-synthesize/server/GenomeDatasetSynthesizeServerCommand.ts
@@ -57,24 +57,56 @@ export class GenomeDatasetSynthesizeServerCommand extends CommandBase<GenomeData
       temperature: 0.8,
     };
 
-    const generateResult = await Commands.execute<AIGenerateParams, AIGenerateResult>(
-      'ai/generate',
-      generateParams,
-    );
+    // Retry with exponential backoff for transient API errors (DeepSeek "error decoding response body", etc.)
+    const MAX_RETRIES = 3;
+    const RETRY_BASE_MS = 2000;
+    let generateResult: AIGenerateResult | undefined;
+    let lastError: string | undefined;
+
+    for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
+      const result = await Commands.execute<AIGenerateParams, AIGenerateResult>(
+        'ai/generate',
+        generateParams,
+      );
+
+      if (result.success && result.text) {
+        generateResult = result;
+        break;
+      }
+
+      lastError = result.error ?? 'LLM generation failed — no text returned';
+
+      if (attempt < MAX_RETRIES && this._isTransientError(lastError)) {
+        const delayMs = RETRY_BASE_MS * Math.pow(2, attempt - 1);
+        console.warn(`⚠️ DATASET SYNTHESIZE: transient error on attempt ${attempt}/${MAX_RETRIES}, retrying in ${delayMs}ms: ${lastError}`);
+        await new Promise(resolve => setTimeout(resolve, delayMs));
+        continue;
+      }
 
-    if (!generateResult.success || !generateResult.text) {
+      // Non-transient error or exhausted retries
       return createGenomeDatasetSynthesizeResultFromParams(params, {
         success: false,
-        error: generateResult.error ?? 'LLM generation failed — no text returned',
+        error: lastError,
         datasetPath: '',
         exampleCount: 0,
         topic,
-        generatedBy: generateResult.model ?? 'unknown',
+        generatedBy: result.model ?? 'unknown',
+      });
+    }
+
+    if (!generateResult) {
+      return createGenomeDatasetSynthesizeResultFromParams(params, {
+        success: false,
+        error: lastError ?? 'LLM generation failed after retries',
+        datasetPath: '',
+        exampleCount: 0,
+        topic,
+        generatedBy: 'unknown',
       });
     }
 
     // Parse the LLM response into JSONL training examples
-    const jsonlLines = this._parseToJSONL(generateResult.text, personaName);
+    const jsonlLines = this._parseToJSONL(generateResult.text!, personaName);
 
     if (jsonlLines.length === 0) {
       return createGenomeDatasetSynthesizeResultFromParams(params, {
@@ -172,6 +204,22 @@ export class GenomeDatasetSynthesizeServerCommand extends CommandBase<GenomeData
     return lines.join('\n');
   }
 
+  /**
+   * Detect transient API errors that are worth retrying.
+   */
+  private _isTransientError(error: string): boolean {
+    const lower = error.toLowerCase();
+    return lower.includes('error decoding response body')
+      || lower.includes('connection reset')
+      || lower.includes('timeout')
+      || lower.includes('502 bad gateway')
+      || lower.includes('503 service')
+      || lower.includes('429 too many')
+      || lower.includes('rate limit')
+      || lower.includes('econnreset')
+      || lower.includes('socket hang up');
+  }
+
   /**
    * Parse LLM output into JSONL training format compatible with genome/train.
    *

diff --git a/src/commands/genome/train/server/GenomeTrainServerCommand.ts b/src/commands/genome/train/server/GenomeTrainServerCommand.ts
@@ -97,6 +97,26 @@ export class GenomeTrainServerCommand extends CommandBase<GenomeTrainParams, Gen
 
     this.log.info(`Loaded ${dataset.examples.length} examples`);
 
+    // 4. Check GPU pressure — refuse training if system is under memory pressure.
+    // On Apple Silicon, VRAM IS system RAM. Training a 3B model with optimizer states
+    // can consume 4-8GB. If inference/TTS/rendering are already using memory, training
+    // will OOM-kill the process.
+    try {
+      const rustClient = RustCoreIPCClient.getInstance();
+      const pressure = await rustClient.gpuPressure();
+      this.log.info(`GPU pressure: ${(pressure * 100).toFixed(1)}%`);
+      if (pressure > 0.6) {
+        return createGenomeTrainResultFromParams(params, {
+          success: false,
+          error: `GPU pressure too high (${(pressure * 100).toFixed(0)}%). Training deferred — would risk OOM. Free memory by unloading models or wait for inference to finish.`,
+          adapterPath: '',
+          metrics: { finalLoss: 0, trainingTime: 0, examplesProcessed: 0, epochs: 0 },
+        });
+      }
+    } catch (e) {
+      this.log.warn(`GPU pressure check failed (${e}), proceeding with training`);
+    }
+
     // ── ASYNC MODE: fire-and-forget, return handle immediately ──────────────
     if (asyncMode) {
       return this._executeAsync(params, adapter, dataset, personaId, personaName, traitType, baseModel);

diff --git a/src/commands/gpu/stats/.npmignore b/src/commands/gpu/stats/.npmignore
@@ -0,0 +1,20 @@
+# Development files
+.eslintrc*
+tsconfig*.json
+vitest.config.ts
+
+# Build artifacts
+*.js.map
+*.d.ts.map
+
+# IDE
+.vscode/
+.idea/
+
+# Logs
+*.log
+npm-debug.log*
+
+# OS files
+.DS_Store
+Thumbs.db