fix: Qwen3 memory estimation

giladgd · giladgd · commit 9f8d27b3e388 · 2025-08-09T20:19:47.000+03:00
diff --git a/docs/blog/v3.12-gpt-oss.md b/docs/blog/v3.12-gpt-oss.md
@@ -1,6 +1,6 @@
 ---
 title: gpt-oss is here!
-date: 2025-08-09T15:00:00Z
+date: 2025-08-09T18:00:00Z
 lastUpdated: false
 author:
     name: Gilad S.
diff --git a/src/gguf/insights/GgufInsights.ts b/src/gguf/insights/GgufInsights.ts
@@ -310,6 +310,8 @@ export class GgufInsights {
                 //         )
                 //     );
                 // }
+            } else if (this._ggufFileInfo.metadata.general?.architecture === GgufArchitectureType.qwen3) {
+                return int32TBytes * batchSize * (embeddingLength + (kvSize * headCount));
             } else if (expertCount > 0) {
                 const expertsUsedCount = this._ggufFileInfo.architectureMetadata.expert_used_count ?? 2;