diff --git a/.vitepress/config.ts b/.vitepress/config.ts
index f9bba2ed..7ec54d35 100644
--- a/.vitepress/config.ts
+++ b/.vitepress/config.ts
@@ -16,6 +16,7 @@ import {Resvg, initWasm as initResvgWasm, type ResvgRenderOptions} from "@resvg/
 import {BlogPageInfoPlugin} from "./config/BlogPageInfoPlugin.js";
 import {getApiReferenceSidebar} from "./config/apiReferenceSidebar.js";
 import {ensureLocalImage} from "./utils/ensureLocalImage.js";
+import {getExcerptFromMarkdownFile} from "./utils/getExcerptFromMarkdownFile.js";
 import type {Element as HastElement, Parent} from "hast";
 
 import type {Node as UnistNode} from "unist";
@@ -28,6 +29,7 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
 const packageJson: typeof import("../package.json") = fs.readJsonSync(path.join(__dirname, "..", "package.json"));
 const env = envVar.from(process.env);
 
+const docsDir = path.join(__dirname, "..", "docs");
 const urlBase = env.get("DOCS_URL_BASE")
     .asString();
 const packageVersion = env.get("DOCS_PACKAGE_VERSION")
@@ -66,9 +68,9 @@ function resolveHref(href: string, withDomain: boolean = false): string {
 }
 
 const defaultImageMetaTags: HeadConfig[] = [
-    ["meta", {name: "og:image", content: socialPosterLink}],
-    ["meta", {name: "og:image:width", content: "4096"}],
-    ["meta", {name: "og:image:height", content: "2048"}],
+    ["meta", {property: "og:image", content: socialPosterLink}],
+    ["meta", {property: "og:image:width", content: "4096"}],
+    ["meta", {property: "og:image:height", content: "2048"}],
     ["meta", {name: "twitter:image", content: socialPosterLink}],
     ["meta", {name: "twitter:card", content: "summary_large_image"}]
 ];
@@ -185,9 +187,9 @@ export default defineConfig({
         ["link", {rel: "alternate", title: "Blog", type: "application/atom+xml", href: resolveHref("/blog/feed.atom", true)}],
         ["meta", {name: "theme-color", content: "#cd8156"}],
         ["meta", {name: "theme-color", content: "#dd773e", media: "(prefers-color-scheme: dark)"}],
-        ["meta", {name: "og:type", content: "website"}],
-        ["meta", {name: "og:locale", content: "en"}],
-        ["meta", {name: "og:site_name", content: "node-llama-cpp"}],
+        ["meta", {property: "og:type", content: "website"}],
+        ["meta", {property: "og:locale", content: "en"}],
+        ["meta", {property: "og:site_name", content: "node-llama-cpp"}],
         ["script", {async: "", src: "https://www.googletagmanager.com/gtag/js?id=G-Q2SWE5Z1ST"}],
         [
             "script",
@@ -198,8 +200,10 @@ export default defineConfig({
         ["style", {}]
     ],
     async transformHead({pageData, head}) {
+        let description = pageData.description;
         if (pageData.filePath === "index.md") {
             head.push(...defaultImageMetaTags);
+            description ||= defaultPageDescription;
         } else if (pageData.relativePath === "404.md")
             head.push(...defaultImageMetaTags);
 
@@ -209,7 +213,6 @@ export default defineConfig({
         ]
             .filter(Boolean)
             .join(" - ") || defaultPageTitle;
-        const description = pageData.description || defaultPageDescription;
 
         if (pageData.filePath.startsWith("blog/") && pageData.frontmatter.image != null) {
             let imageDir = pageData.filePath;
@@ -220,7 +223,7 @@ export default defineConfig({
                 const coverImage = await ensureLocalImage(pageData.frontmatter.image, "cover", {
                     baseDestLocation: imageDir.split("/")
                 });
-                head.push(["meta", {name: "og:image", content: resolveHref(coverImage.urlPath.absolute, true)}]);
+                head.push(["meta", {property: "og:image", content: resolveHref(coverImage.urlPath.absolute, true)}]);
             } else if (typeof pageData.frontmatter.image === "object") {
                 const coverImage = typeof pageData.frontmatter.image.url === "string"
                     ? await ensureLocalImage(pageData.frontmatter.image.url, "cover", {
@@ -230,28 +233,53 @@ export default defineConfig({
 
                 if (typeof pageData.frontmatter.image.url === "string")
                     head.push(["meta", {
-                        name: "og:image",
+                        property: "og:image",
                         content: resolveHref(coverImage?.urlPath.absolute ?? pageData.frontmatter.image.url, true)
                     }]);
 
                 if (pageData.frontmatter.image.width != null)
                     head.push(["meta", {
-                        name: "og:image:width",
+                        property: "og:image:width",
                         content: String(coverImage?.width ?? pageData.frontmatter.image.width)
                     }]);
 
                 if (pageData.frontmatter.image.height != null)
                     head.push(["meta", {
-                        name: "og:image:height",
+                        property: "og:image:height",
                         content: String(coverImage?.height ?? pageData.frontmatter.image.height)
                     }]);
             }
         }
 
-        head.push(["meta", {name: "og:title", content: title}]);
-        head.push(["meta", {name: "og:description", content: description}]);
+        const markdownFilePath = path.join(docsDir, pageData.filePath);
+        if ((description == null || description === "") && await fs.pathExists(markdownFilePath) && !pageData.filePath.startsWith("api/")) {
+            const excerpt = await getExcerptFromMarkdownFile(await fs.readFile(markdownFilePath, "utf8"));
+            if (excerpt != null && excerpt !== "")
+                description = excerpt.replaceAll('"', "'").replaceAll("\n", " ");
+        }
+
+        pageData.description = description;
+
+        if (description != null && description !== "" &&
+            (pageData.frontmatter.description == null || pageData.frontmatter.description === "")
+        ) {
+            pageData.frontmatter.description = description;
+            for (let i = 0; i < head.length; i++) {
+                const header = head[i]!;
+                if (header[0] === "meta" && header[1]?.name === "description") {
+                    head[i] = ["meta", {name: "description", content: description}];
+                    break;
+                }
+            }
+        }
+
+        head.push(["meta", {property: "og:title", content: title}]);
+        if (description != null && description !== "")
+            head.push(["meta", {property: "og:description", content: description}]);
+
         head.push(["meta", {name: "twitter:title", content: title}]);
-        head.push(["meta", {name: "twitter:description", content: description}]);
+        if (description != null && description !== "")
+            head.push(["meta", {name: "twitter:description", content: description}]);
     },
     transformPageData(pageData) {
         if (pageData.filePath.startsWith("api/")) {
@@ -307,7 +335,7 @@ export default defineConfig({
         plugins: [
             GitChangelog({
                 repoURL: () => "https://github.com/withcatai/node-llama-cpp",
-                cwd: path.join(__dirname, "..", "docs")
+                cwd: docsDir
             }) as VitepressPlugin,
             GitChangelogMarkdownSection({
                 exclude: (id) => (
@@ -703,19 +731,28 @@ export default defineConfig({
                 return bDate.getTime() - aDate.getTime();
             });
 
-            for (const {url, excerpt, frontmatter, html} of blogPosts) {
-                const ogImageElement = findElementInHtml(html, (element) => element.tagName === "meta" && element.properties?.name === "og:imag");
+            for (const {url, frontmatter, html, src, excerpt: originalExcerpt} of blogPosts) {
+                const ogImageElement = findElementInHtml(html, (element) => (
+                    element.tagName === "meta" && (element.properties?.name === "og:image" || element.properties?.property === "og:image")
+                ));
                 const date = new Date(frontmatter.date);
                 if (Number.isNaN(date.getTime()))
                     throw new Error(`Invalid date for blog post: ${url}`);
                 else if (frontmatter.title == null || frontmatter.title === "")
                     throw new Error(`Invalid title for blog post: ${url}`);
 
+                let description: string | undefined = frontmatter.description;
+                if ((description == null || description == "") && src != null)
+                    description = await getExcerptFromMarkdownFile(src);
+
+                if ((description == null || description === "") && originalExcerpt != null && originalExcerpt !== "")
+                    description = originalExcerpt;
+
                 feed.addItem({
                     title: frontmatter.title,
                     id: resolveHref(url, true),
                     link: resolveHref(url, true),
-                    description: excerpt || frontmatter.description || undefined,
+                    description,
                     content: html,
                     author: [{
                         name: frontmatter.author?.name,
diff --git a/.vitepress/utils/getExcerptFromMarkdownFile.ts b/.vitepress/utils/getExcerptFromMarkdownFile.ts
new file mode 100644
index 00000000..2cc57818
--- /dev/null
+++ b/.vitepress/utils/getExcerptFromMarkdownFile.ts
@@ -0,0 +1,83 @@
+import {getMarkdownRenderer} from "./getMarkdownRenderer.js";
+
+export async function getExcerptFromMarkdownFile(
+    markdownContent: string,
+    removeTitle: boolean = true,
+    maxLength: number = 80
+) {
+    const renderer = await getMarkdownRenderer();
+    let content = markdownContent.trim().replaceAll("\r\n", "\n");
+
+    if (content.startsWith("---")) {
+        const frontMatterEndIndex = content.indexOf("\n---", "---".length);
+        const nextNewLine = content.indexOf("\n", frontMatterEndIndex + "\n---".length);
+        if (frontMatterEndIndex >= 0 && nextNewLine >= 0)
+            content = content.slice(nextNewLine + 1).trim();
+    }
+
+    if (removeTitle && content.startsWith("# ")) {
+        const nextNewLine = content.indexOf("\n");
+        if (nextNewLine >= 0)
+            content = content.slice(nextNewLine + "\n".length).trim();
+    }
+
+    const renderedText = markdownToPlainText(renderer, content).trim();
+
+    if (renderedText.length > maxLength) {
+        if (renderedText[maxLength] === " ")
+            return renderedText.slice(0, maxLength);
+
+        const lastSpaceIndex = renderedText.lastIndexOf(" ", maxLength);
+        if (lastSpaceIndex >= 0)
+            return renderedText.slice(0, lastSpaceIndex);
+
+        return renderedText.slice(0, maxLength);
+    }
+
+    return renderedText;
+}
+
+function markdownToPlainText(
+    markdownIt: Awaited<ReturnType<typeof getMarkdownRenderer>>,
+    markdown: string,
+    includeNotes: boolean = false,
+    includeCode: boolean = false
+) {
+    const env = {};
+    const pageTokens = markdownIt.parse(markdown, env);
+
+    function toText(tokens: typeof pageTokens) {
+        let text = "";
+        let addedParagraphSpace = false;
+
+        for (const token of tokens) {
+            if (!includeNotes && token.type === "inline" && token.level === 2)
+                continue;
+
+            if (token.children != null) {
+                const childrenText = toText(token.children);
+                if (addedParagraphSpace && childrenText.startsWith(" "))
+                    text += childrenText.slice(" ".length);
+                else
+                    text += childrenText;
+            } else if (
+                ["text", "code_block", "code_inline", "emoji"].includes(token.type) ||
+                (includeCode && ["fence"].includes(token.type))
+            ) {
+                if (addedParagraphSpace && token.content.startsWith(" "))
+                    text += token.content.slice(" ".length);
+                else
+                    text += token.content;
+
+                addedParagraphSpace = false;
+            } else if (token.type.endsWith("_close")) {
+                text += " ";
+                addedParagraphSpace = true;
+            }
+        }
+
+        return text;
+    }
+
+    return toText(pageTokens);
+}
diff --git a/docs/cli/chat.md b/docs/cli/chat.md
index 6cd12c31..bef15ec5 100644
--- a/docs/cli/chat.md
+++ b/docs/cli/chat.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: "'chat' command reference"
 ---
 # `chat` command
 
diff --git a/docs/cli/complete.md b/docs/cli/complete.md
index 6c060438..7ee50c55 100644
--- a/docs/cli/complete.md
+++ b/docs/cli/complete.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: "'complete' command reference"
 ---
 # `complete` command
 
diff --git a/docs/cli/index.md b/docs/cli/index.md
index a079a1d9..98f6dcb8 100644
--- a/docs/cli/index.md
+++ b/docs/cli/index.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: CLI commands reference
 ---
 # CLI
 
diff --git a/docs/cli/infill.md b/docs/cli/infill.md
index cf34f76e..dad931bd 100644
--- a/docs/cli/infill.md
+++ b/docs/cli/infill.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: "'infill' command reference"
 ---
 # `infill` command
 
diff --git a/docs/cli/init.md b/docs/cli/init.md
index 0c56f709..85523bb0 100644
--- a/docs/cli/init.md
+++ b/docs/cli/init.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: "'init' command reference"
 ---
 # `init` command
 
diff --git a/docs/cli/inspect.md b/docs/cli/inspect.md
index c95bf093..a8e98289 100644
--- a/docs/cli/inspect.md
+++ b/docs/cli/inspect.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: "'inspect' command reference"
 ---
 # `inspect` command
 
diff --git a/docs/cli/inspect/estimate.md b/docs/cli/inspect/estimate.md
index 90ab04dc..27b3e537 100644
--- a/docs/cli/inspect/estimate.md
+++ b/docs/cli/inspect/estimate.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: "'inspect estimate' command reference"
 ---
 # `inspect estimate` command
 
diff --git a/docs/cli/inspect/gguf.md b/docs/cli/inspect/gguf.md
index c8545fff..d2265e9b 100644
--- a/docs/cli/inspect/gguf.md
+++ b/docs/cli/inspect/gguf.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: "'inspect gguf' command reference"
 ---
 # `inspect gguf` command
 
diff --git a/docs/cli/inspect/gpu.md b/docs/cli/inspect/gpu.md
index 8d41e8d9..b020c273 100644
--- a/docs/cli/inspect/gpu.md
+++ b/docs/cli/inspect/gpu.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: "'inspect gpu' command reference"
 ---
 # `inspect gpu` command
 
diff --git a/docs/cli/inspect/measure.md b/docs/cli/inspect/measure.md
index 24e1dc7c..0f20b13b 100644
--- a/docs/cli/inspect/measure.md
+++ b/docs/cli/inspect/measure.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: "'inspect measure' command reference"
 ---
 # `inspect measure` command
 
diff --git a/docs/cli/pull.md b/docs/cli/pull.md
index 607ffa6e..7e8e12f5 100644
--- a/docs/cli/pull.md
+++ b/docs/cli/pull.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: "'pull' command reference"
 ---
 # `pull` command
 
diff --git a/docs/cli/source.md b/docs/cli/source.md
index d1872a34..69a7a865 100644
--- a/docs/cli/source.md
+++ b/docs/cli/source.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: "'source' command reference"
 ---
 # `source` command
 
diff --git a/docs/cli/source/build.md b/docs/cli/source/build.md
index 66e2f397..12b10a12 100644
--- a/docs/cli/source/build.md
+++ b/docs/cli/source/build.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: "'source build' command reference"
 ---
 # `source build` command
 
diff --git a/docs/cli/source/clear.md b/docs/cli/source/clear.md
index 1a882e54..e0b1a94f 100644
--- a/docs/cli/source/clear.md
+++ b/docs/cli/source/clear.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: "'source clear' command reference"
 ---
 # `source clear` command
 
diff --git a/docs/cli/source/download.md b/docs/cli/source/download.md
index 26d4f7cd..f9c829e4 100644
--- a/docs/cli/source/download.md
+++ b/docs/cli/source/download.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: "'source download' command reference"
 ---
 # `source download` command
 
diff --git a/docs/guide/CUDA.md b/docs/guide/CUDA.md
index 76a73d2b..07ee283d 100644
--- a/docs/guide/CUDA.md
+++ b/docs/guide/CUDA.md
@@ -1,5 +1,6 @@
 ---
 outline: [2, 3]
+description: CUDA support in node-llama-cpp
 ---
 # CUDA Support
 > CUDA is a parallel computing platform and API created by NVIDIA for NVIDIA GPUs
diff --git a/docs/guide/Metal.md b/docs/guide/Metal.md
index 5798e31b..871f1ef4 100644
--- a/docs/guide/Metal.md
+++ b/docs/guide/Metal.md
@@ -1,3 +1,6 @@
+---
+description: Metal support in node-llama-cpp
+---
 # Metal Support
 > Metal is a low-level 3D graphics and compute API created by Apple for Apple platforms
 
diff --git a/docs/guide/Vulkan.md b/docs/guide/Vulkan.md
index bfa710fb..224d3874 100644
--- a/docs/guide/Vulkan.md
+++ b/docs/guide/Vulkan.md
@@ -1,5 +1,6 @@
 ---
 outline: [2, 3]
+description: Vulkan support in node-llama-cpp
 ---
 # Using Vulkan
 > Vulkan is a low-overhead, cross-platform 3D graphics and computing API
diff --git a/docs/guide/awesome.md b/docs/guide/awesome.md
index 1e8df827..d9ce458d 100644
--- a/docs/guide/awesome.md
+++ b/docs/guide/awesome.md
@@ -1,3 +1,6 @@
+---
+description: Awesome projects that use node-llama-cpp
+---
 # Awesome `node-llama-cpp`
 Awesome projects that use `node-llama-cpp`.
 
diff --git a/docs/guide/batching.md b/docs/guide/batching.md
index db2799ce..e6b2249b 100644
--- a/docs/guide/batching.md
+++ b/docs/guide/batching.md
@@ -1,3 +1,6 @@
+---
+description: Using batching in node-llama-cpp
+---
 # Using Batching
 > Batching is the process of grouping multiple input sequences together to be processed simultaneously,
 > which improves computational efficiently and reduces overall inference times.
diff --git a/docs/guide/building-from-source.md b/docs/guide/building-from-source.md
index f29ac256..be8e695b 100644
--- a/docs/guide/building-from-source.md
+++ b/docs/guide/building-from-source.md
@@ -1,3 +1,6 @@
+---
+description: Building llama.cpp from source for node-llama-cpp
+---
 # Building From Source
 `node-llama-cpp` ships with pre-built binaries for macOS, Linux and Windows.
 
diff --git a/docs/guide/chat-session.md b/docs/guide/chat-session.md
index dce8ecd1..51b5372f 100644
--- a/docs/guide/chat-session.md
+++ b/docs/guide/chat-session.md
@@ -1,3 +1,6 @@
+---
+description: Chatting with a text generation model
+---
 # Using `LlamaChatSession`
 To chat with a text generation model, you can use the [`LlamaChatSession`](../api/classes/LlamaChatSession.md) class.
 
diff --git a/docs/guide/chat-wrapper.md b/docs/guide/chat-wrapper.md
index e01e94eb..3d023627 100644
--- a/docs/guide/chat-wrapper.md
+++ b/docs/guide/chat-wrapper.md
@@ -1,3 +1,6 @@
+---
+description: Chat with a model without having to worry about any parsing or formatting
+---
 # Chat Wrapper
 ## Background
 Text generation models are trained to predict the completion of incomplete text. 
diff --git a/docs/guide/choosing-a-model.md b/docs/guide/choosing-a-model.md
index a27e3297..bae67a4c 100644
--- a/docs/guide/choosing-a-model.md
+++ b/docs/guide/choosing-a-model.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: Learn how to choose the right model for your use case
 ---
 # Choosing a Model
 ## About GGUF Model Files
@@ -83,7 +84,7 @@ npx --no node-llama-cpp inspect estimate <model-file-url>
 ```
 :::
 
-### What do you need this model for? (chat, code completion, analyzing data, classification, etc.) {#model-purpose}
+### What do you need this model for? (chat, code completion, analyzing data, classification, embedding, etc.) {#model-purpose}
 There are plenty of models with different areas of expertise and capabilities.
 
 When you choose a model that is more specialized in the task you need it for, it will usually perform better than a general model.
@@ -111,6 +112,18 @@ Here are a few concepts to be aware of when choosing a model:
   you can either recognize the foundational model name and then assume that the rest is a fine-tune name,
   or you can open the model's page and read the model description.
 
+* **Embedding models** - models that are trained to convert text into [embeddings](./embedding.md) that capture the semantic meaning of the text.
+
+  Generating embeddings for similarity search using such models is preferable
+  because they are highly optimized for this task.
+  Embedding models are often significantly smaller (sometimes as small as 100MB), faster,
+  and consume less memory than general-purpose models, making them more efficient and practical.
+
+  While general-purpose models can also be used for generating embeddings,
+  they may not be as optimized or as efficient as embedding models for this task.
+  
+  Many embedding models include terms like `embed` in their name.
+
 ### How much data do you plan to feed the model at once with?
 If you plan to feed the model with a lot of data at once, you'll need a model that supports a large context size.
 The larger the context size is, the more data the model can process at once.
diff --git a/docs/guide/contributing.md b/docs/guide/contributing.md
index 9f197529..33f06a59 100644
--- a/docs/guide/contributing.md
+++ b/docs/guide/contributing.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: Contributing to node-llama-cpp
 ---
 # Opening a PR on `node-llama-cpp`
 This document describes the guidelines of how to open a PR on the `node-llama-cpp` project.
diff --git a/docs/guide/development.md b/docs/guide/development.md
index c49ca48b..e67b2cc4 100644
--- a/docs/guide/development.md
+++ b/docs/guide/development.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: Developing node-llama-cpp
 ---
 # Developing `node-llama-cpp`
 This document describes how to set up your development environment to contribute to `node-llama-cpp`.
diff --git a/docs/guide/docker.md b/docs/guide/docker.md
index 3028a2a9..8bd8e331 100644
--- a/docs/guide/docker.md
+++ b/docs/guide/docker.md
@@ -1,5 +1,6 @@
 ---
 outline: [2, 4]
+description: Using node-llama-cpp in Docker
 ---
 # Using `node-llama-cpp` in Docker
 When using `node-llama-cpp` in a docker image to run it with [Docker](https://www.docker.com) or [Podman](https://podman.io), you will most likely want to use it together with a GPU for fast inference.
diff --git a/docs/guide/downloading-models.md b/docs/guide/downloading-models.md
index 9c33b096..da8cd3b7 100644
--- a/docs/guide/downloading-models.md
+++ b/docs/guide/downloading-models.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: Downloading models with node-llama-cpp
 ---
 # Downloading Models
 `node-llama-cpp` is equipped with solutions to download models to use them in your project.
diff --git a/docs/guide/electron.md b/docs/guide/electron.md
index 9fe74e0c..04984031 100644
--- a/docs/guide/electron.md
+++ b/docs/guide/electron.md
@@ -1,3 +1,6 @@
+---
+description: Using node-llama-cpp in Electron applications
+---
 # Using in Electron
 `node-llama-cpp` is fully supported in [Electron](https://www.electronjs.org), and also includes custom Electron-specific adaptations.
 
diff --git a/docs/guide/embedding.md b/docs/guide/embedding.md
index 3f0282f9..b8a672a8 100644
--- a/docs/guide/embedding.md
+++ b/docs/guide/embedding.md
@@ -1,5 +1,6 @@
 ---
 outline: [2, 4]
+description: Using embeddings with node-llama-cpp
 ---
 # Using Embedding
 ::: info What is an embedding?
@@ -23,6 +24,8 @@ Instead, we can embed all the documents once and then search for the most simila
 To do that, we embed all the documents in advance and store the embeddings in a database.
 Then, when a query comes in, we embed the query and search for the most similar embeddings in the database, and return the corresponding documents.
 
+Read the [choosing a model tutorial](./choosing-a-model.md) to learn how to choose the right model for your use case.
+
 ## Finding Relevant Documents
 Let's see an example of how we can embed 10 texts and then search for the most relevant one to a given query:
 ::: warning NOTE
@@ -41,7 +44,7 @@ const __dirname = path.dirname(
 
 const llama = await getLlama();
 const model = await llama.loadModel({
-    modelPath: path.join(__dirname, "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf")
+    modelPath: path.join(__dirname, "bge-small-en-v1.5-q8_0.gguf")
 });
 const context = await model.createEmbeddingContext();
 
@@ -108,6 +111,7 @@ console.log("Document:", topSimilarDocument);
 > query: What is the tallest mountain on Earth?
 > Document: Mount Everest is the tallest mountain in the world
 > ```
+> This example uses [bge-small-en-v1.5](https://huggingface.co/CompendiumLabs/bge-small-en-v1.5-gguf/blob/main/bge-small-en-v1.5-q8_0.gguf)
 
 ## Getting Raw Vectors {#raw-vector}
 To get the raw embedding vectors, you can use the [`vector`](../api/classes/LlamaEmbedding.md#vector) property of the [`LlamaEmbedding`](../api/classes/LlamaEmbedding.md) object:
diff --git a/docs/guide/external-chat-state.md b/docs/guide/external-chat-state.md
index 67fde615..47c9deb3 100644
--- a/docs/guide/external-chat-state.md
+++ b/docs/guide/external-chat-state.md
@@ -1,3 +1,6 @@
+---
+description: Chat with a model and manage the chat state externally
+---
 # External Chat State
 ::: warning
 If you're not building a library around `node-llama-cpp`, you'd probably want to use the simpler [`LlamaChatSession`](../api/classes/LlamaChatSession.md); read more on the [chat session documentation](./chat-session.md).
diff --git a/docs/guide/function-calling.md b/docs/guide/function-calling.md
index b5950217..c8ead957 100644
--- a/docs/guide/function-calling.md
+++ b/docs/guide/function-calling.md
@@ -1,5 +1,6 @@
 ---
 outline: [2, 4]
+description: Using function calling
 ---
 # Using Function Calling
 
diff --git a/docs/guide/grammar.md b/docs/guide/grammar.md
index eadae8c3..bb2f4846 100644
--- a/docs/guide/grammar.md
+++ b/docs/guide/grammar.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: Using grammar
 ---
 # Using Grammar
 Use this to enforce a model to generate response in a specific format of text, like `JSON` for example.
diff --git a/docs/guide/index.md b/docs/guide/index.md
index 21e42fc5..ac218614 100644
--- a/docs/guide/index.md
+++ b/docs/guide/index.md
@@ -1,5 +1,6 @@
 ---
 outline: deep
+description: Get started with node-llama-cpp
 ---
 # Getting Started
 
diff --git a/docs/guide/llama-text.md b/docs/guide/llama-text.md
index c9b350a7..adf7f100 100644
--- a/docs/guide/llama-text.md
+++ b/docs/guide/llama-text.md
@@ -1,3 +1,6 @@
+---
+description: The basics of using LlamaText in node-llama-cpp
+---
 # Using LlamaText
 The [`LlamaText`](../api/classes/LlamaText.md) class is used to create content to be loaded into a model's context state without directly using the model's tokenizer for that.
 
diff --git a/docs/guide/objects-lifecycle.md b/docs/guide/objects-lifecycle.md
index 7fdbb06a..3d0db39d 100644
--- a/docs/guide/objects-lifecycle.md
+++ b/docs/guide/objects-lifecycle.md
@@ -1,5 +1,6 @@
 ---
 outline: [2, 3]
+description: Objects lifecycle in node-llama-cpp
 ---
 # Objects Lifecycle
 Every object in `node-llama-cpp` has a ` .dispose()` function you can call to free up its resources.
diff --git a/docs/guide/text-completion.md b/docs/guide/text-completion.md
index 30e47e95..e94f6be9 100644
--- a/docs/guide/text-completion.md
+++ b/docs/guide/text-completion.md
@@ -1,3 +1,6 @@
+---
+description: Generating text completions with node-llama-cpp
+---
 # Text Completion {#title}
 To generate text completions, you can use the [`LlamaCompletion`](../api/classes/LlamaCompletion.md) class.
 
diff --git a/docs/guide/tips-and-tricks.md b/docs/guide/tips-and-tricks.md
index df3949e3..190741ff 100644
--- a/docs/guide/tips-and-tricks.md
+++ b/docs/guide/tips-and-tricks.md
@@ -1,3 +1,6 @@
+---
+description: Tips and tricks for using node-llama-cpp
+---
 # Tips and Tricks
 ## Flash Attention {#flash-attention}
 ::: warning Experimental Feature
diff --git a/docs/guide/token-bias.md b/docs/guide/token-bias.md
index 476d76c2..9dd007fb 100644
--- a/docs/guide/token-bias.md
+++ b/docs/guide/token-bias.md
@@ -1,3 +1,6 @@
+---
+description: Using token bias to adjust the probabilities of tokens in the generated response
+---
 # Using Token Bias {#title}
 ## Background {#background}
 To feed text into a language model,
diff --git a/docs/guide/tokens.md b/docs/guide/tokens.md
index dd203560..da61ccbf 100644
--- a/docs/guide/tokens.md
+++ b/docs/guide/tokens.md
@@ -1,3 +1,6 @@
+---
+description: The basics of working with tokens in node-llama-cpp
+---
 # Using Tokens
 `node-llama-cpp` provides you with a high-level API that abstracts dealing with tokens,
 so you may not even encounter a scenario where you have to deal with tokens directly.
diff --git a/docs/guide/troubleshooting.md b/docs/guide/troubleshooting.md
index d3bfbe8c..0899d733 100644
--- a/docs/guide/troubleshooting.md
+++ b/docs/guide/troubleshooting.md
@@ -1,5 +1,6 @@
 ---
 outline: [2, 3]
+description: Troubleshooting common issues with node-llama-cpp
 ---
 # Troubleshooting
 ## ESM Usage