Skip to content

Commit 6a54163

Browse files
authored
docs: choosing an embedding model (#396)
1 parent 28c7984 commit 6a54163

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+222
-20
lines changed

.vitepress/config.ts

Lines changed: 55 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import {Resvg, initWasm as initResvgWasm, type ResvgRenderOptions} from "@resvg/
1616
import {BlogPageInfoPlugin} from "./config/BlogPageInfoPlugin.js";
1717
import {getApiReferenceSidebar} from "./config/apiReferenceSidebar.js";
1818
import {ensureLocalImage} from "./utils/ensureLocalImage.js";
19+
import {getExcerptFromMarkdownFile} from "./utils/getExcerptFromMarkdownFile.js";
1920
import type {Element as HastElement, Parent} from "hast";
2021

2122
import type {Node as UnistNode} from "unist";
@@ -28,6 +29,7 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
2829
const packageJson: typeof import("../package.json") = fs.readJsonSync(path.join(__dirname, "..", "package.json"));
2930
const env = envVar.from(process.env);
3031

32+
const docsDir = path.join(__dirname, "..", "docs");
3133
const urlBase = env.get("DOCS_URL_BASE")
3234
.asString();
3335
const packageVersion = env.get("DOCS_PACKAGE_VERSION")
@@ -66,9 +68,9 @@ function resolveHref(href: string, withDomain: boolean = false): string {
6668
}
6769

6870
const defaultImageMetaTags: HeadConfig[] = [
69-
["meta", {name: "og:image", content: socialPosterLink}],
70-
["meta", {name: "og:image:width", content: "4096"}],
71-
["meta", {name: "og:image:height", content: "2048"}],
71+
["meta", {property: "og:image", content: socialPosterLink}],
72+
["meta", {property: "og:image:width", content: "4096"}],
73+
["meta", {property: "og:image:height", content: "2048"}],
7274
["meta", {name: "twitter:image", content: socialPosterLink}],
7375
["meta", {name: "twitter:card", content: "summary_large_image"}]
7476
];
@@ -185,9 +187,9 @@ export default defineConfig({
185187
["link", {rel: "alternate", title: "Blog", type: "application/atom+xml", href: resolveHref("/blog/feed.atom", true)}],
186188
["meta", {name: "theme-color", content: "#cd8156"}],
187189
["meta", {name: "theme-color", content: "#dd773e", media: "(prefers-color-scheme: dark)"}],
188-
["meta", {name: "og:type", content: "website"}],
189-
["meta", {name: "og:locale", content: "en"}],
190-
["meta", {name: "og:site_name", content: "node-llama-cpp"}],
190+
["meta", {property: "og:type", content: "website"}],
191+
["meta", {property: "og:locale", content: "en"}],
192+
["meta", {property: "og:site_name", content: "node-llama-cpp"}],
191193
["script", {async: "", src: "https://www.googletagmanager.com/gtag/js?id=G-Q2SWE5Z1ST"}],
192194
[
193195
"script",
@@ -198,8 +200,10 @@ export default defineConfig({
198200
["style", {}]
199201
],
200202
async transformHead({pageData, head}) {
203+
let description = pageData.description;
201204
if (pageData.filePath === "index.md") {
202205
head.push(...defaultImageMetaTags);
206+
description ||= defaultPageDescription;
203207
} else if (pageData.relativePath === "404.md")
204208
head.push(...defaultImageMetaTags);
205209

@@ -209,7 +213,6 @@ export default defineConfig({
209213
]
210214
.filter(Boolean)
211215
.join(" - ") || defaultPageTitle;
212-
const description = pageData.description || defaultPageDescription;
213216

214217
if (pageData.filePath.startsWith("blog/") && pageData.frontmatter.image != null) {
215218
let imageDir = pageData.filePath;
@@ -220,7 +223,7 @@ export default defineConfig({
220223
const coverImage = await ensureLocalImage(pageData.frontmatter.image, "cover", {
221224
baseDestLocation: imageDir.split("/")
222225
});
223-
head.push(["meta", {name: "og:image", content: resolveHref(coverImage.urlPath.absolute, true)}]);
226+
head.push(["meta", {property: "og:image", content: resolveHref(coverImage.urlPath.absolute, true)}]);
224227
} else if (typeof pageData.frontmatter.image === "object") {
225228
const coverImage = typeof pageData.frontmatter.image.url === "string"
226229
? await ensureLocalImage(pageData.frontmatter.image.url, "cover", {
@@ -230,28 +233,53 @@ export default defineConfig({
230233

231234
if (typeof pageData.frontmatter.image.url === "string")
232235
head.push(["meta", {
233-
name: "og:image",
236+
property: "og:image",
234237
content: resolveHref(coverImage?.urlPath.absolute ?? pageData.frontmatter.image.url, true)
235238
}]);
236239

237240
if (pageData.frontmatter.image.width != null)
238241
head.push(["meta", {
239-
name: "og:image:width",
242+
property: "og:image:width",
240243
content: String(coverImage?.width ?? pageData.frontmatter.image.width)
241244
}]);
242245

243246
if (pageData.frontmatter.image.height != null)
244247
head.push(["meta", {
245-
name: "og:image:height",
248+
property: "og:image:height",
246249
content: String(coverImage?.height ?? pageData.frontmatter.image.height)
247250
}]);
248251
}
249252
}
250253

251-
head.push(["meta", {name: "og:title", content: title}]);
252-
head.push(["meta", {name: "og:description", content: description}]);
254+
const markdownFilePath = path.join(docsDir, pageData.filePath);
255+
if ((description == null || description === "") && await fs.pathExists(markdownFilePath) && !pageData.filePath.startsWith("api/")) {
256+
const excerpt = await getExcerptFromMarkdownFile(await fs.readFile(markdownFilePath, "utf8"));
257+
if (excerpt != null && excerpt !== "")
258+
description = excerpt.replaceAll('"', "'").replaceAll("\n", " ");
259+
}
260+
261+
pageData.description = description;
262+
263+
if (description != null && description !== "" &&
264+
(pageData.frontmatter.description == null || pageData.frontmatter.description === "")
265+
) {
266+
pageData.frontmatter.description = description;
267+
for (let i = 0; i < head.length; i++) {
268+
const header = head[i]!;
269+
if (header[0] === "meta" && header[1]?.name === "description") {
270+
head[i] = ["meta", {name: "description", content: description}];
271+
break;
272+
}
273+
}
274+
}
275+
276+
head.push(["meta", {property: "og:title", content: title}]);
277+
if (description != null && description !== "")
278+
head.push(["meta", {property: "og:description", content: description}]);
279+
253280
head.push(["meta", {name: "twitter:title", content: title}]);
254-
head.push(["meta", {name: "twitter:description", content: description}]);
281+
if (description != null && description !== "")
282+
head.push(["meta", {name: "twitter:description", content: description}]);
255283
},
256284
transformPageData(pageData) {
257285
if (pageData.filePath.startsWith("api/")) {
@@ -307,7 +335,7 @@ export default defineConfig({
307335
plugins: [
308336
GitChangelog({
309337
repoURL: () => "https://github.com/withcatai/node-llama-cpp",
310-
cwd: path.join(__dirname, "..", "docs")
338+
cwd: docsDir
311339
}) as VitepressPlugin,
312340
GitChangelogMarkdownSection({
313341
exclude: (id) => (
@@ -703,19 +731,28 @@ export default defineConfig({
703731
return bDate.getTime() - aDate.getTime();
704732
});
705733

706-
for (const {url, excerpt, frontmatter, html} of blogPosts) {
707-
const ogImageElement = findElementInHtml(html, (element) => element.tagName === "meta" && element.properties?.name === "og:imag");
734+
for (const {url, frontmatter, html, src, excerpt: originalExcerpt} of blogPosts) {
735+
const ogImageElement = findElementInHtml(html, (element) => (
736+
element.tagName === "meta" && (element.properties?.name === "og:image" || element.properties?.property === "og:image")
737+
));
708738
const date = new Date(frontmatter.date);
709739
if (Number.isNaN(date.getTime()))
710740
throw new Error(`Invalid date for blog post: ${url}`);
711741
else if (frontmatter.title == null || frontmatter.title === "")
712742
throw new Error(`Invalid title for blog post: ${url}`);
713743

744+
let description: string | undefined = frontmatter.description;
745+
if ((description == null || description == "") && src != null)
746+
description = await getExcerptFromMarkdownFile(src);
747+
748+
if ((description == null || description === "") && originalExcerpt != null && originalExcerpt !== "")
749+
description = originalExcerpt;
750+
714751
feed.addItem({
715752
title: frontmatter.title,
716753
id: resolveHref(url, true),
717754
link: resolveHref(url, true),
718-
description: excerpt || frontmatter.description || undefined,
755+
description,
719756
content: html,
720757
author: [{
721758
name: frontmatter.author?.name,
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import {getMarkdownRenderer} from "./getMarkdownRenderer.js";
2+
3+
export async function getExcerptFromMarkdownFile(
4+
markdownContent: string,
5+
removeTitle: boolean = true,
6+
maxLength: number = 80
7+
) {
8+
const renderer = await getMarkdownRenderer();
9+
let content = markdownContent.trim().replaceAll("\r\n", "\n");
10+
11+
if (content.startsWith("---")) {
12+
const frontMatterEndIndex = content.indexOf("\n---", "---".length);
13+
const nextNewLine = content.indexOf("\n", frontMatterEndIndex + "\n---".length);
14+
if (frontMatterEndIndex >= 0 && nextNewLine >= 0)
15+
content = content.slice(nextNewLine + 1).trim();
16+
}
17+
18+
if (removeTitle && content.startsWith("# ")) {
19+
const nextNewLine = content.indexOf("\n");
20+
if (nextNewLine >= 0)
21+
content = content.slice(nextNewLine + "\n".length).trim();
22+
}
23+
24+
const renderedText = markdownToPlainText(renderer, content).trim();
25+
26+
if (renderedText.length > maxLength) {
27+
if (renderedText[maxLength] === " ")
28+
return renderedText.slice(0, maxLength);
29+
30+
const lastSpaceIndex = renderedText.lastIndexOf(" ", maxLength);
31+
if (lastSpaceIndex >= 0)
32+
return renderedText.slice(0, lastSpaceIndex);
33+
34+
return renderedText.slice(0, maxLength);
35+
}
36+
37+
return renderedText;
38+
}
39+
40+
function markdownToPlainText(
41+
markdownIt: Awaited<ReturnType<typeof getMarkdownRenderer>>,
42+
markdown: string,
43+
includeNotes: boolean = false,
44+
includeCode: boolean = false
45+
) {
46+
const env = {};
47+
const pageTokens = markdownIt.parse(markdown, env);
48+
49+
function toText(tokens: typeof pageTokens) {
50+
let text = "";
51+
let addedParagraphSpace = false;
52+
53+
for (const token of tokens) {
54+
if (!includeNotes && token.type === "inline" && token.level === 2)
55+
continue;
56+
57+
if (token.children != null) {
58+
const childrenText = toText(token.children);
59+
if (addedParagraphSpace && childrenText.startsWith(" "))
60+
text += childrenText.slice(" ".length);
61+
else
62+
text += childrenText;
63+
} else if (
64+
["text", "code_block", "code_inline", "emoji"].includes(token.type) ||
65+
(includeCode && ["fence"].includes(token.type))
66+
) {
67+
if (addedParagraphSpace && token.content.startsWith(" "))
68+
text += token.content.slice(" ".length);
69+
else
70+
text += token.content;
71+
72+
addedParagraphSpace = false;
73+
} else if (token.type.endsWith("_close")) {
74+
text += " ";
75+
addedParagraphSpace = true;
76+
}
77+
}
78+
79+
return text;
80+
}
81+
82+
return toText(pageTokens);
83+
}

docs/cli/chat.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
---
22
outline: deep
3+
description: "'chat' command reference"
34
---
45
# `chat` command
56

docs/cli/complete.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
---
22
outline: deep
3+
description: "'complete' command reference"
34
---
45
# `complete` command
56

docs/cli/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
---
22
outline: deep
3+
description: CLI commands reference
34
---
45
# CLI
56

docs/cli/infill.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
---
22
outline: deep
3+
description: "'infill' command reference"
34
---
45
# `infill` command
56

docs/cli/init.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
---
22
outline: deep
3+
description: "'init' command reference"
34
---
45
# `init` command
56

docs/cli/inspect.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
---
22
outline: deep
3+
description: "'inspect' command reference"
34
---
45
# `inspect` command
56

docs/cli/inspect/estimate.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
---
22
outline: deep
3+
description: "'inspect estimate' command reference"
34
---
45
# `inspect estimate` command
56

docs/cli/inspect/gguf.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
---
22
outline: deep
3+
description: "'inspect gguf' command reference"
34
---
45
# `inspect gguf` command
56

0 commit comments

Comments
 (0)