44 * Model hierarchy (SmolLM2 family - HuggingFace's browser-optimized models):
55 * 1. SmolLM2-135M-Instruct - Ultra-light, works on any device
66 * 2. SmolLM2-360M-Instruct - Balanced quality/speed (default for 4GB RAM)
7- * 3. SmolLM2-1.7B-Instruct - Best quality (requires 8GB+ RAM)
7+ * 3. SmolLM2-1.7B-Instruct - Best quality (requires 8GB+ RAM + WebGPU )
88 *
9- * Auto-selects based on device RAM (navigator.deviceMemory)
9+ * Auto-selects based on device RAM and WASM memory limits.
10+ * When WebGPU is available, larger models become feasible since weights
11+ * go to GPU memory, bypassing WASM heap limits.
1012 */
1113
1214export class GPTBot {
15+ // Cached capability detection (computed once)
16+ static _wasmMaxMB = null ;
17+ static _webGPUAvailable = null ;
18+
19+ /**
20+ * Probe maximum WASM memory available in this browser.
21+ * Uses binary search to find the largest allocatable memory.
22+ * @returns {number } Maximum WASM memory in MB
23+ */
24+ static probeWasmMemory ( ) {
25+ if ( GPTBot . _wasmMaxMB !== null ) {
26+ return GPTBot . _wasmMaxMB ;
27+ }
28+
29+ // Binary search for max allocatable WASM pages
30+ // 1 page = 64 KiB, max theoretical = 65536 pages (4GB)
31+ let min = 1 ;
32+ let max = 65536 ; // 4GB theoretical max
33+ let best = min ;
34+
35+ while ( min <= max ) {
36+ const mid = Math . floor ( ( min + max ) / 2 ) ;
37+ try {
38+ // Try to create Memory with this maximum
39+ new WebAssembly . Memory ( { initial : 1 , maximum : mid } ) ;
40+ best = mid ;
41+ min = mid + 1 ;
42+ } catch ( e ) {
43+ max = mid - 1 ;
44+ }
45+ }
46+
47+ // Convert pages to MB (1 page = 64 KiB = 0.0625 MB)
48+ GPTBot . _wasmMaxMB = Math . floor ( ( best * 64 ) / 1024 ) ;
49+ console . log ( `[GPT] Probed WASM memory limit: ${ GPTBot . _wasmMaxMB } MB (${ best } pages)` ) ;
50+ return GPTBot . _wasmMaxMB ;
51+ }
52+
53+ /**
54+ * Check if WebGPU is available and functional.
55+ * WebGPU allows larger models since weights go to GPU memory.
56+ * @returns {Promise<boolean> }
57+ */
58+ static async checkWebGPU ( ) {
59+ if ( GPTBot . _webGPUAvailable !== null ) {
60+ return GPTBot . _webGPUAvailable ;
61+ }
62+
63+ try {
64+ if ( ! navigator . gpu ) {
65+ GPTBot . _webGPUAvailable = false ;
66+ console . log ( '[GPT] WebGPU not supported in this browser' ) ;
67+ return false ;
68+ }
69+
70+ const adapter = await navigator . gpu . requestAdapter ( ) ;
71+ if ( ! adapter ) {
72+ GPTBot . _webGPUAvailable = false ;
73+ console . log ( '[GPT] WebGPU adapter not available' ) ;
74+ return false ;
75+ }
76+
77+ const limits = adapter . limits ;
78+ const maxBufferSize = limits . maxBufferSize || 0 ;
79+ const maxStorageBufferSize = limits . maxStorageBufferBindingSize || 0 ;
80+
81+ console . log ( `[GPT] WebGPU available - maxBufferSize: ${ Math . floor ( maxBufferSize / 1024 / 1024 ) } MB, maxStorageBuffer: ${ Math . floor ( maxStorageBufferSize / 1024 / 1024 ) } MB` ) ;
82+ GPTBot . _webGPUAvailable = true ;
83+ return true ;
84+ } catch ( e ) {
85+ console . log ( '[GPT] WebGPU check failed:' , e . message ) ;
86+ GPTBot . _webGPUAvailable = false ;
87+ return false ;
88+ }
89+ }
90+
1391 constructor ( ) {
1492 this . generator = null ;
1593 this . isReady = false ;
@@ -19,14 +97,16 @@ export class GPTBot {
1997 this . loadAttempt = 0 ;
2098
2199 // SmolLM2 family - all have native Transformers.js support (ONNX bundled)
100+ // wasmMinMB: minimum WASM heap needed (weights + runtime overhead)
22101 this . models = [
23102 {
24103 name : 'HuggingFaceTB/SmolLM2-135M-Instruct' ,
25104 displayName : 'SmolLM2 135M' ,
26105 dtype : 'q4' ,
27106 params : '135M' ,
28107 sizeMB : 85 ,
29- minRAM : 2 , // Works on 2GB+ devices
108+ wasmMinMB : 300 , // 85MB weights + ~200MB runtime
109+ minRAM : 2 ,
30110 year : 2024 ,
31111 org : 'HuggingFace'
32112 } ,
@@ -36,7 +116,8 @@ export class GPTBot {
36116 dtype : 'q4' ,
37117 params : '360M' ,
38118 sizeMB : 210 ,
39- minRAM : 4 , // Recommended for 4GB+ devices
119+ wasmMinMB : 600 , // 210MB weights + ~400MB runtime
120+ minRAM : 4 ,
40121 year : 2024 ,
41122 org : 'HuggingFace'
42123 } ,
@@ -46,6 +127,7 @@ export class GPTBot {
46127 dtype : 'q4' ,
47128 params : '1.7B' ,
48129 sizeMB : 1410 ,
130+ wasmMinMB : 2500 , // 1410MB weights + ~1GB runtime - exceeds most WASM limits
49131 minRAM : 8 ,
50132 year : 2024 ,
51133 org : 'HuggingFace'
@@ -60,23 +142,43 @@ export class GPTBot {
60142 }
61143
62144 /**
63- * Detect device RAM and select the largest model that fits
64- * Uses 50% of available RAM as the threshold
145+ * Detect device capabilities and select the best model.
146+ *
147+ * Selection logic:
148+ * 1. Probe WASM memory limit
149+ * 2. Check WebGPU availability (allows larger models)
150+ * 3. Consider device RAM
151+ * 4. Select largest model that fits all constraints
65152 */
66153 getDefaultModelIndex ( ) {
67154 const deviceRAM = navigator . deviceMemory || 4 ;
155+ const wasmMaxMB = GPTBot . probeWasmMemory ( ) ;
156+
157+ // WebGPU check is async, so we optimistically check the cached value
158+ // If WebGPU hasn't been checked yet, assume WASM-only for initial selection
159+ const hasWebGPU = GPTBot . _webGPUAvailable === true ;
68160
69- // Cap at 360M (index 1) - 1.7B model exceeds browser WASM memory limits
70- const maxSafeIndex = 1 ;
161+ console . log ( `[GPT] Capability detection: RAM=${ deviceRAM } GB, WASM=${ wasmMaxMB } MB, WebGPU=${ hasWebGPU } ` ) ;
71162
72- for ( let i = Math . min ( maxSafeIndex , this . models . length - 1 ) ; i >= 0 ; i -- ) {
73- if ( deviceRAM >= this . models [ i ] . minRAM ) {
74- console . log ( `[GPT] Detected ${ deviceRAM } GB RAM, auto-selecting ${ this . models [ i ] . displayName } ` ) ;
75- return i ;
163+ for ( let i = this . models . length - 1 ; i >= 0 ; i -- ) {
164+ const model = this . models [ i ] ;
165+
166+ if ( deviceRAM < model . minRAM ) {
167+ console . log ( `[GPT] ${ model . displayName } : skipped (needs ${ model . minRAM } GB RAM, have ${ deviceRAM } GB)` ) ;
168+ continue ;
169+ }
170+
171+ // WebGPU bypasses WASM heap limits by loading weights to GPU memory
172+ if ( ! hasWebGPU && wasmMaxMB < model . wasmMinMB ) {
173+ console . log ( `[GPT] ${ model . displayName } : skipped (needs ${ model . wasmMinMB } MB WASM, have ${ wasmMaxMB } MB)` ) ;
174+ continue ;
76175 }
176+
177+ console . log ( `[GPT] Auto-selecting ${ model . displayName } (RAM: ${ deviceRAM } GB, WASM: ${ wasmMaxMB } MB, WebGPU: ${ hasWebGPU } )` ) ;
178+ return i ;
77179 }
78180
79- console . log ( `[GPT] Low RAM ( ${ deviceRAM } GB), using smallest model` ) ;
181+ console . log ( `[GPT] Falling back to smallest model (limited resources) ` ) ;
80182 return 0 ;
81183 }
82184
0 commit comments