Skip to content

Commit ee0c8cd

Browse files
committed
feat(gpt-bot): Add WASM memory probing for dynamic model selection
- Add probeWasmMemory() to detect browser WASM heap limits via binary search - Add checkWebGPU() to detect GPU availability for larger models - Update model selection to use probed limits instead of hardcoded cap - 1.7B model now selectable when WebGPU available (bypasses WASM heap) - Update tests to match SmolLM2 configuration
1 parent 5bf5035 commit ee0c8cd

File tree

3 files changed

+168
-37
lines changed

3 files changed

+168
-37
lines changed

demos/chatbot-evolution/js/gpt-bot.js

Lines changed: 115 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,90 @@
44
* Model hierarchy (SmolLM2 family - HuggingFace's browser-optimized models):
55
* 1. SmolLM2-135M-Instruct - Ultra-light, works on any device
66
* 2. SmolLM2-360M-Instruct - Balanced quality/speed (default for 4GB RAM)
7-
* 3. SmolLM2-1.7B-Instruct - Best quality (requires 8GB+ RAM)
7+
* 3. SmolLM2-1.7B-Instruct - Best quality (requires 8GB+ RAM + WebGPU)
88
*
9-
* Auto-selects based on device RAM (navigator.deviceMemory)
9+
* Auto-selects based on device RAM and WASM memory limits.
10+
* When WebGPU is available, larger models become feasible since weights
11+
* go to GPU memory, bypassing WASM heap limits.
1012
*/
1113

1214
export class GPTBot {
15+
// Cached capability detection (computed once)
16+
static _wasmMaxMB = null;
17+
static _webGPUAvailable = null;
18+
19+
/**
20+
* Probe maximum WASM memory available in this browser.
21+
* Uses binary search to find the largest allocatable memory.
22+
* @returns {number} Maximum WASM memory in MB
23+
*/
24+
static probeWasmMemory() {
25+
if (GPTBot._wasmMaxMB !== null) {
26+
return GPTBot._wasmMaxMB;
27+
}
28+
29+
// Binary search for max allocatable WASM pages
30+
// 1 page = 64 KiB, max theoretical = 65536 pages (4GB)
31+
let min = 1;
32+
let max = 65536; // 4GB theoretical max
33+
let best = min;
34+
35+
while (min <= max) {
36+
const mid = Math.floor((min + max) / 2);
37+
try {
38+
// Try to create Memory with this maximum
39+
new WebAssembly.Memory({ initial: 1, maximum: mid });
40+
best = mid;
41+
min = mid + 1;
42+
} catch (e) {
43+
max = mid - 1;
44+
}
45+
}
46+
47+
// Convert pages to MB (1 page = 64 KiB = 0.0625 MB)
48+
GPTBot._wasmMaxMB = Math.floor((best * 64) / 1024);
49+
console.log(`[GPT] Probed WASM memory limit: ${GPTBot._wasmMaxMB}MB (${best} pages)`);
50+
return GPTBot._wasmMaxMB;
51+
}
52+
53+
/**
54+
* Check if WebGPU is available and functional.
55+
* WebGPU allows larger models since weights go to GPU memory.
56+
* @returns {Promise<boolean>}
57+
*/
58+
static async checkWebGPU() {
59+
if (GPTBot._webGPUAvailable !== null) {
60+
return GPTBot._webGPUAvailable;
61+
}
62+
63+
try {
64+
if (!navigator.gpu) {
65+
GPTBot._webGPUAvailable = false;
66+
console.log('[GPT] WebGPU not supported in this browser');
67+
return false;
68+
}
69+
70+
const adapter = await navigator.gpu.requestAdapter();
71+
if (!adapter) {
72+
GPTBot._webGPUAvailable = false;
73+
console.log('[GPT] WebGPU adapter not available');
74+
return false;
75+
}
76+
77+
const limits = adapter.limits;
78+
const maxBufferSize = limits.maxBufferSize || 0;
79+
const maxStorageBufferSize = limits.maxStorageBufferBindingSize || 0;
80+
81+
console.log(`[GPT] WebGPU available - maxBufferSize: ${Math.floor(maxBufferSize / 1024 / 1024)}MB, maxStorageBuffer: ${Math.floor(maxStorageBufferSize / 1024 / 1024)}MB`);
82+
GPTBot._webGPUAvailable = true;
83+
return true;
84+
} catch (e) {
85+
console.log('[GPT] WebGPU check failed:', e.message);
86+
GPTBot._webGPUAvailable = false;
87+
return false;
88+
}
89+
}
90+
1391
constructor() {
1492
this.generator = null;
1593
this.isReady = false;
@@ -19,14 +97,16 @@ export class GPTBot {
1997
this.loadAttempt = 0;
2098

2199
// SmolLM2 family - all have native Transformers.js support (ONNX bundled)
100+
// wasmMinMB: minimum WASM heap needed (weights + runtime overhead)
22101
this.models = [
23102
{
24103
name: 'HuggingFaceTB/SmolLM2-135M-Instruct',
25104
displayName: 'SmolLM2 135M',
26105
dtype: 'q4',
27106
params: '135M',
28107
sizeMB: 85,
29-
minRAM: 2, // Works on 2GB+ devices
108+
wasmMinMB: 300, // 85MB weights + ~200MB runtime
109+
minRAM: 2,
30110
year: 2024,
31111
org: 'HuggingFace'
32112
},
@@ -36,7 +116,8 @@ export class GPTBot {
36116
dtype: 'q4',
37117
params: '360M',
38118
sizeMB: 210,
39-
minRAM: 4, // Recommended for 4GB+ devices
119+
wasmMinMB: 600, // 210MB weights + ~400MB runtime
120+
minRAM: 4,
40121
year: 2024,
41122
org: 'HuggingFace'
42123
},
@@ -46,6 +127,7 @@ export class GPTBot {
46127
dtype: 'q4',
47128
params: '1.7B',
48129
sizeMB: 1410,
130+
wasmMinMB: 2500, // 1410MB weights + ~1GB runtime - exceeds most WASM limits
49131
minRAM: 8,
50132
year: 2024,
51133
org: 'HuggingFace'
@@ -60,23 +142,43 @@ export class GPTBot {
60142
}
61143

62144
/**
63-
* Detect device RAM and select the largest model that fits
64-
* Uses 50% of available RAM as the threshold
145+
* Detect device capabilities and select the best model.
146+
*
147+
* Selection logic:
148+
* 1. Probe WASM memory limit
149+
* 2. Check WebGPU availability (allows larger models)
150+
* 3. Consider device RAM
151+
* 4. Select largest model that fits all constraints
65152
*/
66153
getDefaultModelIndex() {
67154
const deviceRAM = navigator.deviceMemory || 4;
155+
const wasmMaxMB = GPTBot.probeWasmMemory();
156+
157+
// WebGPU check is async, so we optimistically check the cached value
158+
// If WebGPU hasn't been checked yet, assume WASM-only for initial selection
159+
const hasWebGPU = GPTBot._webGPUAvailable === true;
68160

69-
// Cap at 360M (index 1) - 1.7B model exceeds browser WASM memory limits
70-
const maxSafeIndex = 1;
161+
console.log(`[GPT] Capability detection: RAM=${deviceRAM}GB, WASM=${wasmMaxMB}MB, WebGPU=${hasWebGPU}`);
71162

72-
for (let i = Math.min(maxSafeIndex, this.models.length - 1); i >= 0; i--) {
73-
if (deviceRAM >= this.models[i].minRAM) {
74-
console.log(`[GPT] Detected ${deviceRAM}GB RAM, auto-selecting ${this.models[i].displayName}`);
75-
return i;
163+
for (let i = this.models.length - 1; i >= 0; i--) {
164+
const model = this.models[i];
165+
166+
if (deviceRAM < model.minRAM) {
167+
console.log(`[GPT] ${model.displayName}: skipped (needs ${model.minRAM}GB RAM, have ${deviceRAM}GB)`);
168+
continue;
169+
}
170+
171+
// WebGPU bypasses WASM heap limits by loading weights to GPU memory
172+
if (!hasWebGPU && wasmMaxMB < model.wasmMinMB) {
173+
console.log(`[GPT] ${model.displayName}: skipped (needs ${model.wasmMinMB}MB WASM, have ${wasmMaxMB}MB)`);
174+
continue;
76175
}
176+
177+
console.log(`[GPT] Auto-selecting ${model.displayName} (RAM: ${deviceRAM}GB, WASM: ${wasmMaxMB}MB, WebGPU: ${hasWebGPU})`);
178+
return i;
77179
}
78180

79-
console.log(`[GPT] Low RAM (${deviceRAM}GB), using smallest model`);
181+
console.log(`[GPT] Falling back to smallest model (limited resources)`);
80182
return 0;
81183
}
82184

notes/2026-01-04-session-handoff.md

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,39 @@
11
# Session Handoff - January 4, 2026
22

33
**Status:** ✅ All tasks completed
4-
**Completed:** 2026-01-04
4+
**Last Updated:** 2026-01-04 08:15 AM ET
55

6-
## Summary
7-
All three tasks from this handoff were completed:
6+
## Current Session Summary
7+
Implemented WASM memory probing for SmolLM2 model auto-selection in GPT bot.
8+
9+
### Changes Made
10+
1. **Added `GPTBot.probeWasmMemory()`** - Binary search to find max allocatable WASM pages
11+
2. **Added `GPTBot.checkWebGPU()`** - Async check for WebGPU availability with adapter limits
12+
3. **Updated `getDefaultModelIndex()`** - Now uses probed WASM limits + WebGPU detection
13+
4. **Added `wasmMinMB` to model configs** - Minimum WASM heap needed (weights + runtime)
14+
5. **Updated tests** - `test-gpt-bot-loading.mjs` now tests SmolLM2 configuration
15+
16+
### Key Logic
17+
- WASM binary search: tries `WebAssembly.Memory({ initial: 1, maximum: N })` to find max N
18+
- 1 page = 64 KiB, so max pages × 64 / 1024 = max MB
19+
- Model selection: largest model that fits both RAM AND WASM constraints
20+
- WebGPU bypass: when WebGPU available, weights go to GPU memory, relaxing WASM limit
21+
22+
### Files Modified
23+
- `demos/chatbot-evolution/js/gpt-bot.js` - WASM probing, WebGPU detection, model selection
24+
- `tests/test-gpt-bot-loading.mjs` - Updated for SmolLM2 models
25+
26+
### Tests Status
27+
- ✅ All GPT bot tests pass (32/32)
28+
- ✅ All chatbot tests pass (ELIZA, PARRY, ALICE)
29+
30+
### Not Yet Pushed
31+
Run `git status` to see pending changes, then `git push` when ready.
32+
33+
---
34+
35+
## Previous Session Summary (Morning)
36+
All three tasks from previous handoff were completed:
837
1. ✅ Syllabus table fixes (columns, sequential lecture numbering, slide links)
938
2. ✅ Tag filtering removed from demos page
1039
3. ✅ Related Lectures links added to all 15 demos

tests/test-gpt-bot-loading.mjs

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -54,18 +54,18 @@ async function testGPTBotStructure() {
5454
);
5555

5656
runner.assert(
57-
botCode.includes('DeepSeek-R1-Distill-Qwen-1.5B'),
58-
'DeepSeek-R1 is configured as primary model'
57+
botCode.includes('SmolLM2-135M-Instruct'),
58+
'SmolLM2 135M is configured as smallest model'
5959
);
6060

6161
runner.assert(
62-
botCode.includes('gemma-3-1b-it'),
63-
'Gemma 3 1B is configured as first fallback'
62+
botCode.includes('SmolLM2-360M-Instruct'),
63+
'SmolLM2 360M is configured as medium model'
6464
);
6565

6666
runner.assert(
67-
botCode.includes('gemma-3-270m-it'),
68-
'Gemma 3 270M is configured as second fallback'
67+
botCode.includes('SmolLM2-1.7B-Instruct'),
68+
'SmolLM2 1.7B is configured as largest model'
6969
);
7070

7171
runner.assert(
@@ -104,8 +104,8 @@ async function testGPTBotStructure() {
104104
);
105105

106106
runner.assert(
107-
botCode.includes('for (let i = 0; i < this.models.length; i++)'),
108-
'Iterates through model fallback chain'
107+
botCode.includes('for (let i = this.selectedModelIndex; i >= 0; i--)'),
108+
'Iterates through model fallback chain (largest to smallest)'
109109
);
110110

111111
runner.assert(
@@ -139,8 +139,8 @@ async function testModelConfigurations() {
139139
runner.assertEqual(modelCount, 3, 'Has exactly 3 models configured');
140140

141141
runner.assert(
142-
modelsSection.includes('onnx-community/'),
143-
'Uses onnx-community models (Transformers.js compatible)'
142+
modelsSection.includes('HuggingFaceTB/'),
143+
'Uses HuggingFaceTB models (Transformers.js compatible)'
144144
);
145145

146146
runner.assert(
@@ -154,8 +154,8 @@ async function testModelConfigurations() {
154154
);
155155

156156
runner.assert(
157-
modelsSection.includes('year: 2025'),
158-
'Models are from 2025 (modern)'
157+
modelsSection.includes('year: 2024'),
158+
'Models are from 2024 (SmolLM2 release)'
159159
);
160160
}
161161

@@ -171,18 +171,18 @@ async function testArchitectureInfo() {
171171
const botCode = readFileSync(botPath, 'utf-8');
172172

173173
runner.assert(
174-
botCode.includes('DeepSeek-R1-Distill-Qwen-1.5B'),
175-
'Has DeepSeek architecture info'
174+
botCode.includes('SmolLM2-135M'),
175+
'Has SmolLM2-135M architecture info'
176176
);
177177

178178
runner.assert(
179-
botCode.includes('Gemma 3 1B IT'),
180-
'Has Gemma 3 1B architecture info'
179+
botCode.includes('SmolLM2-360M'),
180+
'Has SmolLM2-360M architecture info'
181181
);
182182

183183
runner.assert(
184-
botCode.includes('Gemma 3 270M IT'),
185-
'Has Gemma 3 270M architecture info'
184+
botCode.includes('SmolLM2-1.7B'),
185+
'Has SmolLM2-1.7B architecture info'
186186
);
187187

188188
runner.assert(
@@ -191,8 +191,8 @@ async function testArchitectureInfo() {
191191
);
192192

193193
runner.assert(
194-
botCode.includes('Distilled from DeepSeek-R1 reasoning model'),
195-
'Documents DeepSeek distillation origin'
194+
botCode.includes('Optimized for browser/edge deployment'),
195+
'Documents browser optimization'
196196
);
197197

198198
runner.assert(
@@ -212,7 +212,7 @@ async function testResponseHandling() {
212212
const botCode = readFileSync(botPath, 'utf-8');
213213

214214
runner.assert(
215-
botCode.includes('max_new_tokens: 150'),
215+
botCode.includes('max_new_tokens: 256'),
216216
'Limits response length'
217217
);
218218

0 commit comments

Comments
 (0)