withcatai · giladgd · Aug 9, 2025 · Aug 1, 2025 · Aug 5, 2025 · Aug 5, 2025
diff --git a/.vitepress/theme/index.ts b/.vitepress/theme/index.ts
@@ -19,9 +19,9 @@ import type {EnhanceAppContext} from "vitepress";
 export default {
     extends: Theme,
     Layout: () => {
-        const text = "DeepSeek R1 is here!";
-        const link = "/blog/v3.6-deepseek-r1";
-        const hideDate = new Date("2025-06-01T00:00:00Z");
+        const text = "gpt-oss is here!";
+        const link = "/blog/v3.12-gpt-oss";
+        const hideDate = new Date("2025-11-01T00:00:00Z");
 
         return h(LayoutContainer, null, h(Theme.Layout, null, {
             "home-hero-info-before": () => h(LatestVersionHomeBadge, {

diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@
 
 </div>
 
-✨ [DeepSeek R1 is here!](https://node-llama-cpp.withcat.ai/blog/v3.6-deepseek-r1) ✨
+✨ [`gpt-oss` is here!](https://node-llama-cpp.withcat.ai/blog/v3.12-gpt-oss) ✨
 
 ## Features
 * Run LLMs locally on your machine

diff --git a/docs/blog/v3.12-gpt-oss.md b/docs/blog/v3.12-gpt-oss.md
@@ -0,0 +1,142 @@
+---
+title: gpt-oss is here!
+date: 2025-08-09T18:00:00Z
+lastUpdated: false
+author:
+    name: Gilad S.
+    github: giladgd
+category: Release
+description: Learn how to use gpt-oss to its full potential with node-llama-cpp
+image:
+    url: https://github.com/user-attachments/assets/df5f1f59-a2cd-4fdb-b60c-3214f4a1584b
+    alt: "node-llama-cpp + gpt-oss"
+    width: 3072
+    height: 1536
+---
+[`node-llama-cpp`](https://node-llama-cpp.withcat.ai) v3.12 is here, with full support for [`gpt-oss`](https://huggingface.co/openai/gpt-oss-20b) models!
+
+---
+
+## gpt-oss
+[`gpt-oss`](https://huggingface.co/openai/gpt-oss-20b) comes in two flavors:
+* [`gpt-oss-20b`](https://huggingface.co/openai/gpt-oss-20b) - 21B parameters with 3.6B active parameters
+* [`gpt-oss-120b`](https://huggingface.co/openai/gpt-oss-120b) - 117B parameters with 5.1B active parameters
+
+Here are a few highlights of these models:
+* Due to the low number of active parameters, these models are very fast
+* These are reasoning models, and you can adjust their reasoning efforts
+* They are very good at function calling, and are built with agentic capabilities in mind
+* These models were trained with native MXFP4 precision, so no need to quantize them further.
+  They're small compared to their capabilities already
+* They are provided with an Apache 2.0 license, so you can use them in your commercial applications
+
+
+## Recommended Models
+Here are some recommended model URIs you can use to try out `gpt-oss` right away:
+| Model                                                              | Size   | URI                                                                   |
+|--------------------------------------------------------------------|--------|-----------------------------------------------------------------------|
+| [`gpt-oss-20b`](https://huggingface.co/giladgd/gpt-oss-20b-GGUF)   | 12.1GB | `hf:giladgd/gpt-oss-20b-GGUF/gpt-oss-20b.MXFP4.gguf`                  |
+| [`gpt-oss-120b`](https://huggingface.co/giladgd/gpt-oss-120b-GGUF) | 63.4GB | `hf:giladgd/gpt-oss-120b-GGUF/gpt-oss-120b.MXFP4-00001-of-00002.gguf` |
+
+::: info TIP
+[Estimate the compatibility](../cli/inspect/estimate.md) of a model with your machine before downloading it:
+```shell
+npx -y node-llama-cpp inspect estimate <model URI>
+```
+:::
+
+
+### Try It Using the CLI
+To quickly try out [`gpt-oss-20b`](https://huggingface.co/giladgd/gpt-oss-20b-GGUF), you can use the [CLI `chat` command](../cli/chat.md):
+
+```shell
+npx -y node-llama-cpp chat --ef --prompt "Hi there" hf:giladgd/gpt-oss-20b-GGUF/gpt-oss-20b.MXFP4.gguf
+```
+
+
+## Customizing gpt-oss
+You can adjust `gpt-oss`'s responses by configuring the options of [`HarmonyChatWrapper`](../api/classes/HarmonyChatWrapper.md):
+```typescript
+import {
+    getLlama, resolveModelFile, LlamaChatSession,
+    HarmonyChatWrapper
+} from "node-llama-cpp";
+
+const modelUri = "hf:giladgd/gpt-oss-20b-GGUF/gpt-oss-20b.MXFP4.gguf";
+
+
+const llama = await getLlama();
+const model = await llama.loadModel({
+    modelPath: await resolveModelFile(modelUri)
+});
+const context = await model.createContext();
+const session = new LlamaChatSession({
+    contextSequence: context.getSequence(),
+    chatWrapper: new HarmonyChatWrapper({
+        modelIdentity: "You are ChatGPT, a large language model trained by OpenAI.",
+        reasoningEffort: "high"
+    })
+});
+
+const q1 = "What is the weather like in SF?";
+console.log("User: " + q1);
+
+const a1 = await session.prompt(q1);
+console.log("AI: " + a1);
+```
+
+### Using Function Calling
+`gpt-oss` models have great support for function calling.
+However, these models don't support parallel function calling, so only one function will be called at a time.
+
+```typescript
+import {
+    getLlama, resolveModelFile, LlamaChatSession,
+    defineChatSessionFunction
+} from "node-llama-cpp";
+
+const modelUri = "hf:giladgd/gpt-oss-20b-GGUF/gpt-oss-20b.MXFP4.gguf";
+
+
+const llama = await getLlama();
+const model = await llama.loadModel({
+    modelPath: await resolveModelFile(modelUri)
+});
+const context = await model.createContext();
+const session = new LlamaChatSession({
+    contextSequence: context.getSequence()
+});
+
+const functions = {
+    getCurrentWeather: defineChatSessionFunction({
+        description: "Gets the current weather in the provided location.",
+        params: {
+            type: "object",
+            properties: {
+                location: {
+                    type: "string",
+                    description: "The city and state, e.g. San Francisco, CA"
+                },
+                format: {
+                    enum: ["celsius", "fahrenheit"]
+                }
+            }
+        },
+        handler({location, format}) {
+            console.log(`Getting current weather for "${location}" in ${format}`);
+
+            return {
+                // simulate a weather API response
+                temperature: format === "celsius" ? 20 : 68,
+                format
+            };
+        }
+    })
+};
+
+const q1 = "What is the weather like in SF?";
+console.log("User: " + q1);
+
+const a1 = await session.prompt(q1, {functions});
+console.log("AI: " + a1);
+```