fix(gpt-bot): Cap default at 360M and fix fallback to try smaller models

jeremymanning · jeremymanning · commit 5bf503568cc7 · 2026-01-04T08:13:39.000-05:00
- 1.7B model (1.4GB) exceeds browser WASM memory limits
- Default now caps at 360M for reliable loading
- Fallback now tries SMALLER models when larger ones fail
- Fixed incorrect 1.7B size (was 980MB, actually 1.4GB)
diff --git a/demos/chatbot-evolution/index.html b/demos/chatbot-evolution/index.html
@@ -562,7 +562,7 @@ <h4>SmolLM2 Model Family</h4>
                                     <tr><td>Parameters</td><td>135 Million</td><td>360 Million</td><td>1.7 Billion</td></tr>
                                     <tr><td>Layers</td><td>9</td><td>16</td><td>24</td></tr>
                                     <tr><td>Hidden Size</td><td>576</td><td>960</td><td>2048</td></tr>
-                                    <tr><td>Download (q4)</td><td>~85 MB</td><td>~210 MB</td><td>~980 MB</td></tr>
+                                    <tr><td>Download (q4)</td><td>~85 MB</td><td>~210 MB</td><td>~1.4 GB</td></tr>
                                     <tr><td>Min RAM</td><td>2 GB</td><td>4 GB</td><td>8 GB</td></tr>
                                     <tr><td>Context Length</td><td colspan="3">8,192 tokens</td></tr>
                                 </table>
diff --git a/demos/chatbot-evolution/js/gpt-bot.js b/demos/chatbot-evolution/js/gpt-bot.js
@@ -45,8 +45,8 @@ export class GPTBot {
                 displayName: 'SmolLM2 1.7B',
                 dtype: 'q4',
                 params: '1.7B',
-                sizeMB: 980,
-                minRAM: 8,  // Requires 8GB+ RAM
+                sizeMB: 1410,
+                minRAM: 8,
                 year: 2024,
                 org: 'HuggingFace'
             }
@@ -64,20 +64,18 @@ export class GPTBot {
      * Uses 50% of available RAM as the threshold
      */
     getDefaultModelIndex() {
-        // navigator.deviceMemory returns RAM in GB (rounded to power of 2, max 8)
-        // Falls back to 4GB if unsupported (conservative default)
         const deviceRAM = navigator.deviceMemory || 4;
         
-        // Find the largest model that fits in 50% of available RAM
-        // Models are ordered smallest to largest, so iterate backwards
-        for (let i = this.models.length - 1; i >= 0; i--) {
+        // Cap at 360M (index 1) - 1.7B model exceeds browser WASM memory limits
+        const maxSafeIndex = 1;
+        
+        for (let i = Math.min(maxSafeIndex, this.models.length - 1); i >= 0; i--) {
             if (deviceRAM >= this.models[i].minRAM) {
                 console.log(`[GPT] Detected ${deviceRAM}GB RAM, auto-selecting ${this.models[i].displayName}`);
                 return i;
             }
         }
         
-        // Fallback to smallest model
         console.log(`[GPT] Low RAM (${deviceRAM}GB), using smallest model`);
         return 0;
     }
@@ -177,14 +175,14 @@ export class GPTBot {
             
             console.log(`[GPT] Using device: ${device}`);
 
-            const startIndex = this.selectedModelIndex;
-            for (let i = startIndex; i < this.models.length; i++) {
-                this.loadAttempt = i - startIndex + 1;
+            // Try selected model first, then fall back to SMALLER models
+            for (let i = this.selectedModelIndex; i >= 0; i--) {
+                this.loadAttempt = this.selectedModelIndex - i + 1;
                 const model = this.models[i];
                 this.currentModel = model;
 
                 try {
-                    console.log(`[GPT] Attempting to load model ${i + 1}/${this.models.length}: ${model.name}`);
+                    console.log(`[GPT] Attempting to load model: ${model.name}`);
                     this.reportProgress(`Loading ${model.displayName} (${model.params})`);
 
                     this.generator = await pipeline('text-generation', model.name, {
@@ -205,23 +203,14 @@ export class GPTBot {
                     console.log(`[GPT] Successfully loaded ${model.displayName}`);
                     this.isReady = true;
                     this.isLoading = false;
+                    this.selectedModelIndex = i;
                     this.reportProgress(`${model.displayName} loaded successfully!`, 100);
                     return true;
 
                 } catch (modelError) {
-                    // Extract meaningful error info
                     const errorMsg = modelError?.message || String(modelError);
-                    const errorName = modelError?.name || 'Unknown';
-                    
                     console.error(`[GPT] Failed to load ${model.displayName}:`, errorMsg);
-                    console.error('[GPT] Error type:', errorName);
-                    console.error('[GPT] Full error object:', modelError);
-                    
-                    if (modelError?.stack) {
-                        console.error('[GPT] Stack trace:', modelError.stack);
-                    }
                     
-                    // If WebGPU failed, try WASM for this model
                     if (device === 'webgpu') {
                         console.log(`[GPT] Retrying ${model.displayName} with WASM backend...`);
                         try {
@@ -241,15 +230,16 @@ export class GPTBot {
                             console.log(`[GPT] Successfully loaded ${model.displayName} with WASM`);
                             this.isReady = true;
                             this.isLoading = false;
+                            this.selectedModelIndex = i;
                             this.reportProgress(`${model.displayName} loaded (WASM)!`, 100);
                             return true;
                         } catch (wasmError) {
                             console.error(`[GPT] WASM fallback also failed:`, wasmError?.message || wasmError);
                         }
                     }
                     
-                    if (i < this.models.length - 1) {
-                        this.reportProgress(`${model.displayName} failed, trying next model...`);
+                    if (i > 0) {
+                        this.reportProgress(`${model.displayName} failed, trying smaller model...`);
                     }
                 }
             }