Skip to content

Commit 902306f

Browse files
committed
fix(gpt-bot): Use onnx-community models for Transformers.js compatibility
- Replace HuggingFaceTB/SmolLM2-xxx-Instruct with onnx-community models - SmolLM2 135M/360M ONNX versions work correctly with Transformers.js v3 - Add Qwen2.5-0.5B-Instruct as largest model option (replaces 1.7B) - Use q4f16 quantization for optimal browser performance - Update HTML to reflect new model specs and links - Update tests for new model configuration The original HuggingFaceTB models have ONNX files but aren't properly structured for Transformers.js browser compatibility, causing load failures.
1 parent ee0c8cd commit 902306f

File tree

3 files changed

+54
-54
lines changed

3 files changed

+54
-54
lines changed

demos/chatbot-evolution/index.html

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -455,11 +455,11 @@ <h2>2020s: GPT & Transformers</h2>
455455

456456
<div class="chatbot-info">
457457
<div class="info-card">
458-
<h3>About SmolLM2</h3>
459-
<p><strong>Creator:</strong> HuggingFace (2024)</p>
458+
<h3>About Modern LLMs</h3>
459+
<p><strong>Models:</strong> SmolLM2 (135M, 360M) & Qwen2.5 (0.5B)</p>
460460
<p><strong>Method:</strong> Decoder-only transformer with chat templates</p>
461-
<p><strong>Models:</strong> 135M, 360M, 1.7B parameters</p>
462461
<p><strong>Innovation:</strong> Optimized for browser/edge deployment</p>
462+
<p><strong>Source:</strong> onnx-community (Transformers.js compatible)</p>
463463
</div>
464464

465465
<div class="info-card">
@@ -522,7 +522,7 @@ <h4>Decoder-Only Transformer (SmolLM2)</h4>
522522
<div class="sub-block">Grouped-Query Attention</div>
523523
<div class="sub-block">SwiGLU FFN</div>
524524
<div class="sub-block">RMSNorm</div>
525-
<div class="block-note">x9 (135M) / x16 (360M) / x24 (1.7B) layers</div>
525+
<div class="block-note">x9 (135M) / x16 (360M) / x24 (Qwen 0.5B) layers</div>
526526
</div>
527527
</div>
528528
<div class="arch-arrow">&#8595;</div>
@@ -556,21 +556,21 @@ <h5>Quantization (q4)</h5>
556556
</div>
557557

558558
<div class="model-specs">
559-
<h4>SmolLM2 Model Family</h4>
559+
<h4>Available Models</h4>
560560
<table class="specs-table">
561-
<tr><th>Spec</th><th>135M</th><th>360M</th><th>1.7B</th></tr>
562-
<tr><td>Parameters</td><td>135 Million</td><td>360 Million</td><td>1.7 Billion</td></tr>
561+
<tr><th>Spec</th><th>SmolLM2 135M</th><th>SmolLM2 360M</th><th>Qwen2.5 0.5B</th></tr>
562+
<tr><td>Parameters</td><td>135 Million</td><td>360 Million</td><td>494 Million</td></tr>
563563
<tr><td>Layers</td><td>9</td><td>16</td><td>24</td></tr>
564-
<tr><td>Hidden Size</td><td>576</td><td>960</td><td>2048</td></tr>
565-
<tr><td>Download (q4)</td><td>~85 MB</td><td>~210 MB</td><td>~1.4 GB</td></tr>
566-
<tr><td>Min RAM</td><td>2 GB</td><td>4 GB</td><td>8 GB</td></tr>
567-
<tr><td>Context Length</td><td colspan="3">8,192 tokens</td></tr>
564+
<tr><td>Hidden Size</td><td>576</td><td>960</td><td>896</td></tr>
565+
<tr><td>Download (q4f16)</td><td>~111 MB</td><td>~259 MB</td><td>~460 MB</td></tr>
566+
<tr><td>Min RAM</td><td>2 GB</td><td>4 GB</td><td>4 GB</td></tr>
567+
<tr><td>Context Length</td><td colspan="2">8,192 tokens</td><td>32,768 tokens</td></tr>
568568
</table>
569569
<div class="model-note">
570570
<strong>Model Links:</strong>
571-
<a href="https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct" target="_blank">135M</a> |
572-
<a href="https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct" target="_blank">360M</a> |
573-
<a href="https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct" target="_blank">1.7B</a>
571+
<a href="https://huggingface.co/onnx-community/SmolLM2-135M-Instruct-ONNX" target="_blank">SmolLM2 135M</a> |
572+
<a href="https://huggingface.co/onnx-community/SmolLM2-360M-Instruct-ONNX" target="_blank">SmolLM2 360M</a> |
573+
<a href="https://huggingface.co/onnx-community/Qwen2.5-0.5B-Instruct" target="_blank">Qwen2.5 0.5B</a>
574574
</div>
575575
</div>
576576
</div>
@@ -639,8 +639,8 @@ <h3>Evolution Stats</h3>
639639
<span class="stat-value">90M params</span>
640640
</div>
641641
<div class="stat-item">
642-
<span class="stat-label">SmolLM2 (2024)</span>
643-
<span class="stat-value">135M-1.7B params</span>
642+
<span class="stat-label">SmolLM2/Qwen (2024)</span>
643+
<span class="stat-value">135M-500M params</span>
644644
</div>
645645
<div class="stat-item">
646646
<span class="stat-label">Claude 4 Opus (2025)</span>

demos/chatbot-evolution/js/gpt-bot.js

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
/**
22
* GPT-style Bot (2020s) - Modern LLM via Transformers.js v3
33
*
4-
* Model hierarchy (SmolLM2 family - HuggingFace's browser-optimized models):
5-
* 1. SmolLM2-135M-Instruct - Ultra-light, works on any device
6-
* 2. SmolLM2-360M-Instruct - Balanced quality/speed (default for 4GB RAM)
7-
* 3. SmolLM2-1.7B-Instruct - Best quality (requires 8GB+ RAM + WebGPU)
4+
* Model hierarchy (using onnx-community models for Transformers.js compatibility):
5+
* 1. SmolLM2-135M-Instruct-ONNX - Ultra-light (~110MB), works on any device
6+
* 2. SmolLM2-360M-Instruct-ONNX - Balanced quality/speed (~260MB)
7+
* 3. Qwen2.5-0.5B-Instruct - Best quality (~460MB, requires 4GB+ RAM)
8+
*
9+
* IMPORTANT: Uses onnx-community models which are specifically exported and
10+
* tested for Transformers.js browser compatibility. The original HuggingFaceTB
11+
* models may have ONNX files but aren't guaranteed to work with Transformers.js.
812
*
913
* Auto-selects based on device RAM and WASM memory limits.
10-
* When WebGPU is available, larger models become feasible since weights
11-
* go to GPU memory, bypassing WASM heap limits.
1214
*/
1315

1416
export class GPTBot {
@@ -96,41 +98,39 @@ export class GPTBot {
9698
this.onProgress = null;
9799
this.loadAttempt = 0;
98100

99-
// SmolLM2 family - all have native Transformers.js support (ONNX bundled)
100-
// wasmMinMB: minimum WASM heap needed (weights + runtime overhead)
101101
this.models = [
102102
{
103-
name: 'HuggingFaceTB/SmolLM2-135M-Instruct',
103+
name: 'onnx-community/SmolLM2-135M-Instruct-ONNX',
104104
displayName: 'SmolLM2 135M',
105-
dtype: 'q4',
105+
dtype: 'q4f16',
106106
params: '135M',
107-
sizeMB: 85,
108-
wasmMinMB: 300, // 85MB weights + ~200MB runtime
107+
sizeMB: 111,
108+
wasmMinMB: 400,
109109
minRAM: 2,
110110
year: 2024,
111111
org: 'HuggingFace'
112112
},
113113
{
114-
name: 'HuggingFaceTB/SmolLM2-360M-Instruct',
114+
name: 'onnx-community/SmolLM2-360M-Instruct-ONNX',
115115
displayName: 'SmolLM2 360M',
116-
dtype: 'q4',
116+
dtype: 'q4f16',
117117
params: '360M',
118-
sizeMB: 210,
119-
wasmMinMB: 600, // 210MB weights + ~400MB runtime
118+
sizeMB: 259,
119+
wasmMinMB: 700,
120120
minRAM: 4,
121121
year: 2024,
122122
org: 'HuggingFace'
123123
},
124124
{
125-
name: 'HuggingFaceTB/SmolLM2-1.7B-Instruct',
126-
displayName: 'SmolLM2 1.7B',
127-
dtype: 'q4',
128-
params: '1.7B',
129-
sizeMB: 1410,
130-
wasmMinMB: 2500, // 1410MB weights + ~1GB runtime - exceeds most WASM limits
131-
minRAM: 8,
125+
name: 'onnx-community/Qwen2.5-0.5B-Instruct',
126+
displayName: 'Qwen2.5 0.5B',
127+
dtype: 'q4f16',
128+
params: '0.5B',
129+
sizeMB: 460,
130+
wasmMinMB: 1200,
131+
minRAM: 4,
132132
year: 2024,
133-
org: 'HuggingFace'
133+
org: 'Alibaba'
134134
}
135135
];
136136

@@ -489,11 +489,11 @@ export class GPTBot {
489489
const specs = {
490490
'SmolLM2-135M': { layers: 9, hiddenSize: 576, attentionHeads: 9 },
491491
'SmolLM2-360M': { layers: 16, hiddenSize: 960, attentionHeads: 15 },
492-
'SmolLM2-1.7B': { layers: 24, hiddenSize: 2048, attentionHeads: 32 }
492+
'Qwen2.5-0.5B': { layers: 24, hiddenSize: 896, attentionHeads: 14 }
493493
};
494494

495495
const modelKey = model.name.includes('135M') ? 'SmolLM2-135M' :
496-
model.name.includes('360M') ? 'SmolLM2-360M' : 'SmolLM2-1.7B';
496+
model.name.includes('360M') ? 'SmolLM2-360M' : 'Qwen2.5-0.5B';
497497
const spec = specs[modelKey];
498498

499499
return {

tests/test-gpt-bot-loading.mjs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -54,18 +54,18 @@ async function testGPTBotStructure() {
5454
);
5555

5656
runner.assert(
57-
botCode.includes('SmolLM2-135M-Instruct'),
58-
'SmolLM2 135M is configured as smallest model'
57+
botCode.includes('SmolLM2-135M-Instruct-ONNX'),
58+
'SmolLM2 135M ONNX is configured as smallest model'
5959
);
6060

6161
runner.assert(
62-
botCode.includes('SmolLM2-360M-Instruct'),
63-
'SmolLM2 360M is configured as medium model'
62+
botCode.includes('SmolLM2-360M-Instruct-ONNX'),
63+
'SmolLM2 360M ONNX is configured as medium model'
6464
);
6565

6666
runner.assert(
67-
botCode.includes('SmolLM2-1.7B-Instruct'),
68-
'SmolLM2 1.7B is configured as largest model'
67+
botCode.includes('Qwen2.5-0.5B-Instruct'),
68+
'Qwen2.5 0.5B is configured as largest model'
6969
);
7070

7171
runner.assert(
@@ -74,8 +74,8 @@ async function testGPTBotStructure() {
7474
);
7575

7676
runner.assert(
77-
botCode.includes('dtype: \'q4\''),
78-
'Uses q4 quantization for smaller model size'
77+
botCode.includes('dtype: \'q4f16\''),
78+
'Uses q4f16 quantization for smaller model size'
7979
);
8080

8181
runner.assert(
@@ -139,8 +139,8 @@ async function testModelConfigurations() {
139139
runner.assertEqual(modelCount, 3, 'Has exactly 3 models configured');
140140

141141
runner.assert(
142-
modelsSection.includes('HuggingFaceTB/'),
143-
'Uses HuggingFaceTB models (Transformers.js compatible)'
142+
modelsSection.includes('onnx-community/'),
143+
'Uses onnx-community models (Transformers.js compatible)'
144144
);
145145

146146
runner.assert(
@@ -181,8 +181,8 @@ async function testArchitectureInfo() {
181181
);
182182

183183
runner.assert(
184-
botCode.includes('SmolLM2-1.7B'),
185-
'Has SmolLM2-1.7B architecture info'
184+
botCode.includes('Qwen2.5-0.5B'),
185+
'Has Qwen2.5-0.5B architecture info'
186186
);
187187

188188
runner.assert(

0 commit comments

Comments
 (0)