Skip to content

Add check that page title is in sync with ToC, h1, and metadata #3669

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"test": "playwright test",
"typecheck": "tsc",
"check": "npm run check:patterns-index && npm run check:qiskit-bot && npm run check:metadata && npm run check:images && npm run check:orphan-pages && npm run check:spelling && npm run check:internal-links",
"check:images": "tsx scripts/js/commands/checkImages.ts",
"check:images": "tsx scripts/js/commands/checkMarkdown.ts",
"check:metadata": "tsx scripts/js/commands/checkMetadata.ts",
"check:spelling": "tsx scripts/js/commands/checkSpelling.ts",
"check:fmt": "prettier --check .",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
import { globby } from "globby";
import yargs from "yargs/yargs";
import { hideBin } from "yargs/helpers";

import { collectInvalidImageErrors } from "../lib/markdownImages.js";
import { readMarkdown } from "../lib/markdownReader.js";
import { collectHeadingTitleMismatch } from "../lib/markdownTitles.js";

interface Arguments {
[x: string]: unknown;
Expand All @@ -35,29 +37,53 @@ const readArgs = (): Arguments => {

async function main() {
const args = readArgs();

const files = await determineContentFiles(args);
const fileErrors: string[] = [];

for (const file of files) {
const markdown = await readMarkdown(file);
const imageErrors = await collectInvalidImageErrors(markdown);
const mismatchedTitleHeadingErrors =
await collectHeadingTitleMismatch(markdown);

const errorsInFile: string[] = [];

// Handle image errors
if (imageErrors.size) {
errorsInFile.push(
...[...imageErrors].map((err) => `Image error: ${err}`),
);
}

//Handle title/heading mismatch errors
if (mismatchedTitleHeadingErrors.length) {
errorsInFile.push(
...mismatchedTitleHeadingErrors.map(
(err) => `Title/Heading mismatch: ${err}`,
),
);
}

//Collect all errors for this file
if (errorsInFile.length) {
fileErrors.push(
`Error in file '${file}':\n\t- ${[...imageErrors].join("\n\t- ")}\n`,
`Error in file '${file}':\n\t- ${errorsInFile.join("\n\t- ")}\n`,
);
}
}

// Final error report
if (fileErrors.length) {
fileErrors.forEach((error) => console.log(error));
console.error(
"💔 Some images have problems. See https://github.com/Qiskit/documentation#images for instructions.\n",
"Some issues were found in your Markdown files. Please fix them before proceeding.\n" +
"Image help: https://github.com/Qiskit/documentation#images\n" +
"Title/Heading help: https://github.com/Qiskit/documentation#titles-and-headings\n",
);
process.exit(1);
}
console.log("✅ All images are valid.\n");

console.log("All files passed validation.\n");
}

async function determineContentFiles(args: Arguments): Promise<string[]> {
Expand Down
64 changes: 64 additions & 0 deletions scripts/js/lib/markdownTitles.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// This code is a Qiskit project.
//
// (C) Copyright IBM 2023.
//
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
//
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.

import { expect, test } from "@playwright/test";

import { collectHeadingTitleMismatch } from "./markdownTitles";

test("Test for matching titles and headings", async () => {
const markdown1 = `---
title: My Awesome Guide
---

# My Awesome Guide
`;
const mismatched = await collectHeadingTitleMismatch(markdown1);
const result: string[] = [];
expect(mismatched).toEqual(result);
});

test("Test to find mismatched titles and headings", async () => {
const markdown2 = `---
title: Qiskit Doc
author: John
---

# Introduction

This guide will walk you through everything.`;

const mismatched2 = await collectHeadingTitleMismatch(markdown2);

const result2: string[] = [
`Mismatch: frontmatter title "Qiskit Doc" does not match heading "Introduction"`,
];

expect(mismatched2).toEqual(result2);
});

test("Test to mismatched and complex titles and headings", async () => {
const markdown2 = `---
title: My Awesome Guide
---

# This is a *Heading*

This guide will walk you through everything.`;

const mismatched2 = await collectHeadingTitleMismatch(markdown2);

const result2: string[] = [
`Mismatch: frontmatter title "My Awesome Guide" does not match heading "This is a Heading"`,
];

expect(mismatched2).toEqual(result2);
});
64 changes: 64 additions & 0 deletions scripts/js/lib/markdownTitles.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { unified } from "unified";
import remarkParse from "remark-parse";
import remarkGfm from "remark-gfm";
import remarkFrontmatter from "remark-frontmatter";
import { visit } from "unist-util-visit";
import { Root } from "mdast";
import yaml from "js-yaml";

// Helper to recursively extract visible text from heading node
function extractText(node: any): string {
if (node.type === "text" || node.type === "inlineCode") {
return node.value;
}

if (node.type === "link" && node.children) {
return node.children.map(extractText).join(" ");
}

if (node.children && Array.isArray(node.children)) {
return node.children.map(extractText).join(" ");
}

return "";
}

export async function collectHeadingTitleMismatch(
markdown: string,
): Promise<string[]> {
const mismatches = new Set<string>();

let frontmatterTitle: string | undefined;
let headingText: string | undefined;

const processor = unified()
.use(remarkParse)
.use(remarkGfm)
.use(remarkFrontmatter, ["yaml"]);

const tree = processor.parse(markdown);

// Extract frontmatter title
visit(tree, "yaml", (node: any) => {
const data = yaml.load(node.value);
if (typeof data === "object" && data !== null && "title" in data) {
frontmatterTitle = (data as any).title;
}
});

// Extract first level-1 heading with full formatting
visit(tree, "heading", (node: any) => {
if (node.depth === 1 && !headingText) {
headingText = extractText(node).trim();
}
});

// Compare and collect mismatch
if (frontmatterTitle && headingText && frontmatterTitle !== headingText) {
mismatches.add(
`Mismatch: frontmatter title "${frontmatterTitle}" does not match heading "${headingText}"`,
);
}

return Array.from(mismatches);
}
Loading