Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changeset/legal-pots-nail.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"@speakeasy-api/docs-mcp-server": minor
"@speakeasy-api/docs-mcp-core": minor
"@speakeasy-api/docs-mcp-cli": minor
---

Extract human-readable titles for MCP resources from markdown files. Titles are resolved using frontmatter `title` with a fallback to the first H1 heading. These titles appear in MCP resource listings, making it easier for agents to identify documents.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
.vscode/mcp.json
.cursor/mcp.json
.codex/config.toml
node_modules/
dist/
.turbo/
Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@
"prettier": "^3.8.1",
"turbo": "^2.4.4",
"typescript": "^5.9.3",
"vitest": "^3.0.9"
"vitest": "^4.0.17"
},
"engines": {
"node": ">=22.0.0"
}
}
}
10 changes: 10 additions & 0 deletions packages/cli/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import {
type EmbeddingMetadata,
type ManifestTaxonomyFieldConfig,
type IndexBuildStep,
type FileMeta,
type Manifest,
type PreviousIndexReader,
} from "@speakeasy-api/docs-mcp-core";
Expand Down Expand Up @@ -306,6 +307,7 @@ program

const chunks: Chunk[] = [];
const newFileFingerprints: Record<string, string> = {};
const filesMeta: Record<string, FileMeta> = {};
let chunkCacheHits = 0;
for (let fi = 0; fi < files.length; fi++) {
writeProgress(`Chunking [${fi + 1}/${files.length}]...`);
Expand All @@ -327,6 +329,10 @@ program
const fingerprint = computeChunkFingerprint(markdown, resolved.strategy, resolved.metadata);
newFileFingerprints[relative] = fingerprint;

if (resolved.title) {
filesMeta[relative] = { title: resolved.title };
}

if (previousIndex?.fingerprints.get(relative) === fingerprint) {
const cachedChunks = await previousIndex.getChunks(relative);
chunks.push(...cachedChunks);
Expand Down Expand Up @@ -497,6 +503,7 @@ program
embeddingMetadata,
sourceCommit,
taxonomyConfig,
filesMeta,
Object.keys(toolDescriptions).length > 0 ? toolDescriptions : undefined,
mcpServerInstructions,
);
Expand Down Expand Up @@ -662,6 +669,7 @@ function buildMetadata(
embedding: EmbeddingMetadata | null,
sourceCommit: string | null,
taxonomyConfig: Record<string, ManifestTaxonomyFieldConfig>,
filesMeta: Record<string, FileMeta>,
toolDescriptions?: Record<string, string>,
mcpServerInstructions?: string,
): {
Expand All @@ -685,6 +693,7 @@ function buildMetadata(
embedding: EmbeddingMetadata | null;
tool_descriptions?: Record<string, string>;
mcpServerInstructions?: string;
files?: Record<string, FileMeta>;
} {
const taxonomyValues = new Map<string, Set<string>>();
for (const chunk of chunks) {
Expand Down Expand Up @@ -727,6 +736,7 @@ function buildMetadata(
embedding,
...(toolDescriptions ? { tool_descriptions: toolDescriptions } : {}),
...(mcpServerInstructions ? { mcpServerInstructions } : {}),
...(filesMeta ? { files: filesMeta } : {}),
};
}

Expand Down
14 changes: 14 additions & 0 deletions packages/core/src/manifest.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import matter from "gray-matter";
import picomatch from "picomatch";
import { ManifestTaxonomyConfigSchema } from "./manifest-schema.js";
import { extractFirstH1 } from "./parser.js";
import type {
ChunkingStrategy,
Manifest,
Expand Down Expand Up @@ -147,6 +148,8 @@ export function resolveFileConfig(params: {
}
}

let title: string | undefined;

if (params.markdown) {
// HTML comment hints: applied after manifest but before frontmatter
const htmlHint = parseHtmlChunkingHint(params.markdown);
Expand All @@ -167,11 +170,14 @@ export function resolveFileConfig(params: {
if (frontmatterOverrides.strategy) {
strategy = frontmatterOverrides.strategy;
}

title = frontmatterOverrides.title ?? extractFirstH1(params.markdown);
}

return {
strategy,
metadata,
...(title ? { title } : {}),
};
}

Expand Down Expand Up @@ -201,6 +207,7 @@ function toPosixPath(value: string): string {
function parseFrontmatterOverrides(markdown: string): {
strategy?: ChunkingStrategy;
metadata?: Record<string, string>;
title?: string;
} {
const parsed = matter(markdown);
if (!parsed.data || typeof parsed.data !== "object") {
Expand Down Expand Up @@ -229,16 +236,23 @@ function parseFrontmatterOverrides(markdown: string): {
};
}

const title =
typeof data.title === "string" && data.title.trim() ? data.title.trim() : undefined;

const result: {
strategy?: ChunkingStrategy;
metadata?: Record<string, string>;
title?: string;
} = {};
if (strategy) {
result.strategy = strategy;
}
if (metadata) {
result.metadata = metadata;
}
if (title) {
result.title = title;
}
return result;
}

Expand Down
41 changes: 41 additions & 0 deletions packages/core/src/metadata.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import semver from "semver";
import type {
CorpusMetadata,
EmbeddingMetadata,
FileMeta,
TaxonomyField,
TaxonomyValueProperties,
ToolDescriptions,
Expand Down Expand Up @@ -58,13 +59,15 @@ export function normalizeMetadata(
const embedding = normalizeEmbedding(metadata.embedding);
const toolDescriptions = normalizeToolDescriptions(metadata.tool_descriptions);
const mcpServerInstructions = normalizeInstructions(metadata.mcpServerInstructions);
const files = normalizeFiles(metadata.files);

return {
metadata_version: metadataVersion,
corpus_description: corpusDescription,
taxonomy,
stats,
embedding,
files,
...(toolDescriptions ? { tool_descriptions: toolDescriptions } : {}),
...(mcpServerInstructions ? { mcpServerInstructions } : {}),
};
Expand Down Expand Up @@ -257,6 +260,44 @@ function normalizeInstructions(value: unknown): string | undefined {
return asNonEmptyString(value, "mcpServerInstructions");
}

function normalizeFiles(value: unknown): Record<string, FileMeta> {
if (value === null || value === undefined) {
return {};
}

if (typeof value !== "object" || Array.isArray(value)) {
throw new Error("files must be an object");
}

const raw = value as Record<string, unknown>;
const result: Record<string, FileMeta> = {};
let hasAny = false;

for (const [filepath, entry] of Object.entries(raw)) {
if (!entry || typeof entry !== "object" || Array.isArray(entry)) {
throw new Error(`files['${filepath}'] must be an object`);
}

const meta = entry as Record<string, unknown>;
const fileMeta: FileMeta = {};

if (meta.title !== undefined) {
if (typeof meta.title !== "string") {
throw new Error(`files['${filepath}'].title must be a string`);
}
const trimmed = meta.title.trim();
if (trimmed) {
fileMeta.title = trimmed;
}
}

result[filepath] = fileMeta;
hasAny = true;
}

return result;
}

function asTrimmedString(value: unknown): string {
if (typeof value !== "string") {
throw new Error("expected string");
Expand Down
16 changes: 16 additions & 0 deletions packages/core/src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import remarkParse from "remark-parse";
import remarkFrontmatter from "remark-frontmatter";
import remarkGfm from "remark-gfm";
import type { Root } from "mdast";
import { toString } from "mdast-util-to-string";

const processor = unified().use(remarkParse).use(remarkFrontmatter, ["yaml"]).use(remarkGfm);

Expand All @@ -18,3 +19,18 @@ const processor = unified().use(remarkParse).use(remarkFrontmatter, ["yaml"]).us
export function parseMarkdown(content: string): Root {
return processor.parse(content);
}

/**
* Extract the text content of the first H1 heading from a markdown string.
* Returns undefined if no H1 heading is found or it is empty.
*/
export function extractFirstH1(markdown: string): string | undefined {
const ast = processor.parse(markdown);
for (const node of ast.children) {
if (node.type === "heading" && node.depth === 1) {
const text = toString(node).trim();
return text || undefined;
}
}
return undefined;
}
6 changes: 6 additions & 0 deletions packages/core/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,17 @@ export interface ToolDescriptions {
get_doc?: string;
}

export interface FileMeta {
title?: string;
}

export interface CorpusMetadata {
metadata_version: string;
corpus_description: string;
taxonomy: Record<string, TaxonomyField>;
stats: MetadataStats;
embedding: EmbeddingMetadata | null;
files: Record<string, FileMeta>;
tool_descriptions?: ToolDescriptions;
mcpServerInstructions?: string;
}
Expand All @@ -156,6 +161,7 @@ export interface BuildChunksInput {
export interface ResolvedFileConfig {
strategy: ChunkingStrategy;
metadata: Record<string, string>;
title?: string;
}

export interface EmbeddingConfig {
Expand Down
49 changes: 49 additions & 0 deletions packages/core/test/manifest.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,55 @@ describe("mergeTaxonomyConfigs", () => {
});
});

describe("resolveFileConfig title resolution", () => {
it("prefers frontmatter title over H1 heading", () => {
const markdown = [
"---",
"title: Frontmatter Title",
"---",
"# H1 Title",
"Body text",
].join("\n");

const resolved = resolveFileConfig({
relativeFilePath: "guide.md",
markdown,
});

expect(resolved.title).toBe("Frontmatter Title");
});

it("falls back to H1 heading when no frontmatter title", () => {
const markdown = ["# Getting Started", "", "Some content here."].join("\n");

const resolved = resolveFileConfig({
relativeFilePath: "guide.md",
markdown,
});

expect(resolved.title).toBe("Getting Started");
});

it("returns no title when neither frontmatter title nor H1 exist", () => {
const markdown = ["## Only H2", "", "Some content."].join("\n");

const resolved = resolveFileConfig({
relativeFilePath: "guide.md",
markdown,
});

expect(resolved.title).toBeUndefined();
});

it("returns no title when no markdown is provided", () => {
const resolved = resolveFileConfig({
relativeFilePath: "guide.md",
});

expect(resolved.title).toBeUndefined();
});
});

describe("parseManifest mcpServerInstructions", () => {
it("parses mcpServerInstructions when present", () => {
const manifest = parseManifest({
Expand Down
34 changes: 34 additions & 0 deletions packages/core/test/parser.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { describe, expect, it } from "vitest";
import { extractFirstH1 } from "../src/parser.js";

describe("extractFirstH1", () => {
it("returns the text of the first H1 heading", () => {
expect(extractFirstH1("# Hello World\n\nSome content")).toBe("Hello World");
});

it("returns undefined when there is no H1", () => {
expect(extractFirstH1("## Second level\n\nSome content")).toBeUndefined();
});

it("skips YAML frontmatter and finds the H1", () => {
const md = ["---", "key: value", "---", "# My Title", "Body text"].join("\n");
expect(extractFirstH1(md)).toBe("My Title");
});

it("returns the first H1 when there are multiple", () => {
const md = ["# First", "Some text", "# Second"].join("\n");
expect(extractFirstH1(md)).toBe("First");
});

it("returns undefined for an empty H1", () => {
expect(extractFirstH1("#")).toBeUndefined();
});

it("returns undefined for empty input", () => {
expect(extractFirstH1("")).toBeUndefined();
});

it("handles inline formatting in H1", () => {
expect(extractFirstH1("# Hello **bold** world")).toBe("Hello bold world");
});
});
1 change: 1 addition & 0 deletions packages/server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
"dependencies": {
"@modelcontextprotocol/sdk": "^1.26.0",
"@speakeasy-api/docs-mcp-core": "workspace:*",
"change-case": "^5.4.4",
"commander": "^13.1.0",
"zod": "^4.3.6"
}
Expand Down
1 change: 1 addition & 0 deletions packages/server/src/http.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ function createMcpServer(
resources: resources.map((r) => ({
uri: r.uri,
name: r.name,
title: r.title,
description: r.description,
mimeType: r.mimeType,
})),
Expand Down
Loading