Skip to content

Commit 5bf5035

Browse files
committed
fix(gpt-bot): Cap default at 360M and fix fallback to try smaller models
- 1.7B model (1.4GB) exceeds browser WASM memory limits - Default now caps at 360M for reliable loading - Fallback now tries SMALLER models when larger ones fail - Fixed incorrect 1.7B size (was 980MB, actually 1.4GB)
1 parent d2c9d2e commit 5bf5035

File tree

2 files changed

+15
-25
lines changed

2 files changed

+15
-25
lines changed

demos/chatbot-evolution/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,7 @@ <h4>SmolLM2 Model Family</h4>
562562
<tr><td>Parameters</td><td>135 Million</td><td>360 Million</td><td>1.7 Billion</td></tr>
563563
<tr><td>Layers</td><td>9</td><td>16</td><td>24</td></tr>
564564
<tr><td>Hidden Size</td><td>576</td><td>960</td><td>2048</td></tr>
565-
<tr><td>Download (q4)</td><td>~85 MB</td><td>~210 MB</td><td>~980 MB</td></tr>
565+
<tr><td>Download (q4)</td><td>~85 MB</td><td>~210 MB</td><td>~1.4 GB</td></tr>
566566
<tr><td>Min RAM</td><td>2 GB</td><td>4 GB</td><td>8 GB</td></tr>
567567
<tr><td>Context Length</td><td colspan="3">8,192 tokens</td></tr>
568568
</table>

demos/chatbot-evolution/js/gpt-bot.js

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ export class GPTBot {
4545
displayName: 'SmolLM2 1.7B',
4646
dtype: 'q4',
4747
params: '1.7B',
48-
sizeMB: 980,
49-
minRAM: 8, // Requires 8GB+ RAM
48+
sizeMB: 1410,
49+
minRAM: 8,
5050
year: 2024,
5151
org: 'HuggingFace'
5252
}
@@ -64,20 +64,18 @@ export class GPTBot {
6464
* Uses 50% of available RAM as the threshold
6565
*/
6666
getDefaultModelIndex() {
67-
// navigator.deviceMemory returns RAM in GB (rounded to power of 2, max 8)
68-
// Falls back to 4GB if unsupported (conservative default)
6967
const deviceRAM = navigator.deviceMemory || 4;
7068

71-
// Find the largest model that fits in 50% of available RAM
72-
// Models are ordered smallest to largest, so iterate backwards
73-
for (let i = this.models.length - 1; i >= 0; i--) {
69+
// Cap at 360M (index 1) - 1.7B model exceeds browser WASM memory limits
70+
const maxSafeIndex = 1;
71+
72+
for (let i = Math.min(maxSafeIndex, this.models.length - 1); i >= 0; i--) {
7473
if (deviceRAM >= this.models[i].minRAM) {
7574
console.log(`[GPT] Detected ${deviceRAM}GB RAM, auto-selecting ${this.models[i].displayName}`);
7675
return i;
7776
}
7877
}
7978

80-
// Fallback to smallest model
8179
console.log(`[GPT] Low RAM (${deviceRAM}GB), using smallest model`);
8280
return 0;
8381
}
@@ -177,14 +175,14 @@ export class GPTBot {
177175

178176
console.log(`[GPT] Using device: ${device}`);
179177

180-
const startIndex = this.selectedModelIndex;
181-
for (let i = startIndex; i < this.models.length; i++) {
182-
this.loadAttempt = i - startIndex + 1;
178+
// Try selected model first, then fall back to SMALLER models
179+
for (let i = this.selectedModelIndex; i >= 0; i--) {
180+
this.loadAttempt = this.selectedModelIndex - i + 1;
183181
const model = this.models[i];
184182
this.currentModel = model;
185183

186184
try {
187-
console.log(`[GPT] Attempting to load model ${i + 1}/${this.models.length}: ${model.name}`);
185+
console.log(`[GPT] Attempting to load model: ${model.name}`);
188186
this.reportProgress(`Loading ${model.displayName} (${model.params})`);
189187

190188
this.generator = await pipeline('text-generation', model.name, {
@@ -205,23 +203,14 @@ export class GPTBot {
205203
console.log(`[GPT] Successfully loaded ${model.displayName}`);
206204
this.isReady = true;
207205
this.isLoading = false;
206+
this.selectedModelIndex = i;
208207
this.reportProgress(`${model.displayName} loaded successfully!`, 100);
209208
return true;
210209

211210
} catch (modelError) {
212-
// Extract meaningful error info
213211
const errorMsg = modelError?.message || String(modelError);
214-
const errorName = modelError?.name || 'Unknown';
215-
216212
console.error(`[GPT] Failed to load ${model.displayName}:`, errorMsg);
217-
console.error('[GPT] Error type:', errorName);
218-
console.error('[GPT] Full error object:', modelError);
219-
220-
if (modelError?.stack) {
221-
console.error('[GPT] Stack trace:', modelError.stack);
222-
}
223213

224-
// If WebGPU failed, try WASM for this model
225214
if (device === 'webgpu') {
226215
console.log(`[GPT] Retrying ${model.displayName} with WASM backend...`);
227216
try {
@@ -241,15 +230,16 @@ export class GPTBot {
241230
console.log(`[GPT] Successfully loaded ${model.displayName} with WASM`);
242231
this.isReady = true;
243232
this.isLoading = false;
233+
this.selectedModelIndex = i;
244234
this.reportProgress(`${model.displayName} loaded (WASM)!`, 100);
245235
return true;
246236
} catch (wasmError) {
247237
console.error(`[GPT] WASM fallback also failed:`, wasmError?.message || wasmError);
248238
}
249239
}
250240

251-
if (i < this.models.length - 1) {
252-
this.reportProgress(`${model.displayName} failed, trying next model...`);
241+
if (i > 0) {
242+
this.reportProgress(`${model.displayName} failed, trying smaller model...`);
253243
}
254244
}
255245
}

0 commit comments

Comments
 (0)