Skip to content

Add check that page title is in sync with ToC, h1, and metadata #3669

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/api-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,5 @@ jobs:
run: npm run check:orphan-pages -- --apis
- name: Check metadata
run: npm run check:metadata -- --apis
- name: Check images
run: npm run check:images -- --apis
- name: Check markdown
run: npm run check:markdown -- --apis
4 changes: 2 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ jobs:

- name: File metadata
run: npm run check:metadata
- name: Check images
run: npm run check:images
- name: Check markdown
run: npm run check:markdown
- name: Spellcheck
run: npm run check:spelling
- name: Check Qiskit bot config
Expand Down
2 changes: 1 addition & 1 deletion check
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ CHECKS = {
"metadata": ["npm", "run", "check:metadata"],
"patterns index pages": ["npm", "run", "check:patterns-index"],
"tutorials index page": ["python3", "scripts/ci/check-tutorials-index.py"],
"images": ["npm", "run", "check:images"],
"images": ["npm", "run", "check:markdown"],
"orphan pages": ["npm", "run", "check:orphan-pages"],
"spelling": ["npm", "run", "check:spelling"],
"internal links": ["npm", "run", "check:internal-links"],
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"test": "playwright test",
"typecheck": "tsc",
"check": "npm run check:patterns-index && npm run check:qiskit-bot && npm run check:metadata && npm run check:images && npm run check:orphan-pages && npm run check:spelling && npm run check:internal-links",
"check:images": "tsx scripts/js/commands/checkImages.ts",
"check:markdown": "tsx scripts/js/commands/checkMarkdown.ts",
"check:metadata": "tsx scripts/js/commands/checkMetadata.ts",
"check:spelling": "tsx scripts/js/commands/checkSpelling.ts",
"check:fmt": "prettier --check .",
Expand Down
82 changes: 0 additions & 82 deletions scripts/js/commands/checkImages.ts

This file was deleted.

146 changes: 146 additions & 0 deletions scripts/js/commands/checkMarkdown.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
// This code is a Qiskit project.
//
// (C) Copyright IBM 2024.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// (C) Copyright IBM 2024.
// (C) Copyright IBM 2025.

//
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
//
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.

import { globby } from "globby";
import yargs from "yargs/yargs";
import { hideBin } from "yargs/helpers";

import { collectInvalidImageErrors } from "../lib/markdownImages.js";
import { readMarkdown } from "../lib/markdownReader.js";
import { collectHeadingTitleMismatch } from "../lib/markdownTitles.js";

const IGNORE_TITLE_MISMATCHES: string[] = [
"docs/migration-guides/external-providers-primitives-v2.mdx",
"docs/migration-guides/local-simulators.mdx",
"docs/migration-guides/metapackage-migration.mdx",
"docs/migration-guides/qiskit-1.0-features.mdx",
"docs/migration-guides/qiskit-1.0-installation.mdx",
"docs/migration-guides/qiskit-algorithms-module.mdx",
"docs/migration-guides/qiskit-backend-primitives.mdx",
"docs/migration-guides/qiskit-backendv1-to-v2.mdx",
"docs/migration-guides/qiskit-opflow-module.mdx",
"docs/migration-guides/qiskit-runtime-from-ibm-provider.mdx",
"docs/migration-guides/qiskit-runtime-from-ibmq-provider.mdx",
"docs/migration-guides/qiskit-runtime-options.mdx",
"docs/guides/access-groups.mdx",
"docs/migration-guides/v2-primitives.mdx",
"docs/guides/execution-modes.mdx",
"docs/guides/install-qiskit-source.mdx",
"docs/guides/manage-cost.mdx",
"docs/guides/plans-overview.mdx",
"docs/guides/qiskit-addons-aqc.mdx",
"docs/guides/qiskit-addons-sqd.mdx",
"docs/guides/qiskit-code-assistant-vscode.mdx",
"docs/guides/qiskit-function-templates.mdx",
"docs/guides/serverless.mdx",
"docs/open-source/code-of-conduct.mdx",
"docs/open-source/create-a-provider.mdx",
"docs/support/execution-modes-faq.mdx",
"docs/support/faq.mdx",
"learning/index.mdx",
"learning/courses/basics-of-quantum-information/exam.mdx",
"learning/courses/basics-of-quantum-information/index.mdx",
"learning/courses/foundations-of-quantum-error-correction/index.mdx",
"learning/courses/fundamentals-of-quantum-algorithms/exam.mdx",
"learning/courses/fundamentals-of-quantum-algorithms/index.mdx",
"learning/courses/quantum-business-foundations/business-impacts.mdx",
"learning/courses/quantum-business-foundations/exam.mdx",
"learning/courses/quantum-business-foundations/quantum-computing-fundamentals.mdx",
"learning/courses/quantum-business-foundations/quantum-technology.mdx",
"learning/courses/quantum-business-foundations/start-your-quantum-journey.mdx",
"learning/courses/quantum-chem-with-vqe/exam.mdx",
"learning/courses/quantum-diagonalization-algorithms/exam.mdx",
"learning/courses/quantum-machine-learning/exam.mdx",
"learning/courses/quantum-safe-cryptography/exam.mdx",
"learning/courses/utility-scale-quantum-computing/classical-simulation.mdx",
"learning/courses/variational-algorithm-design/exam.mdx",
];

const allErrors: string[] = [];

interface Arguments {
[x: string]: unknown;
apis: boolean;
}

const readArgs = (): Arguments => {
return yargs(hideBin(process.argv))
.version(false)
.option("apis", {
type: "boolean",
default: false,
description:
"Check the images in the current and dev versions of the API docs have alt text.",
})
.parseSync();
};

async function main() {
const args = readArgs();
const files = await determineContentFiles(args);

for (const file of files) {
const markdown = await readMarkdown(file);
const imageErrors = await collectInvalidImageErrors(markdown);
const mismatchedTitleHeadingErrors =
IGNORE_TITLE_MISMATCHES.includes(file) || file.startsWith("docs/api")
? new Set<string>()
: await collectHeadingTitleMismatch(markdown);

//Collect all errors for this file
const errorsInFile: string[] = [
...imageErrors,
...mismatchedTitleHeadingErrors,
];

if (errorsInFile.length) {
allErrors.push(
// `Error in file '${file}':\n\t- ${[...imageErrors].join("\n\t- ")}\n`,
`Error in file '${file}':\n\t- ${errorsInFile.join("\n\t- ")}\n`,
);
}
}

// Final error report
if (allErrors.length) {
allErrors.forEach((error) => console.log(error));
console.error(
"Some issues were found in your Markdown files. Please fix them before proceeding.\n" +
"Image help: https://github.com/Qiskit/documentation#images\n" +
"Title/Heading help: https://github.com/Qiskit/documentation#titles-and-headings\n",
);
process.exit(1);
}

console.log("All files passed validation.\n");
}

async function determineContentFiles(args: Arguments): Promise<string[]> {
// We always skip historical versions to simplify the code and to have a faster script.
// Even though it is possible for someone to add a new image without alt text to a
// historical version that wasn't in the original release, that's very unlikely.
// If it happens, it would probably be a backport from latest or dev, and the linter in
// the API repo should catch it.
//
// If an image is missed by the API repo's linter, it will still have an alt text defined,
// although it won't be very useful. That's because Sphinx auto-generates alt text.
const globs = [
"{docs,learning}/**/*.{ipynb,mdx}",
args.apis ? "!docs/api/*/([0-9]*)/*.mdx" : "!docs/api/**/*.mdx",
// Remove when https://github.com/Qiskit/documentation/issues/2564 is fixed
`!docs/api/qiskit/release-notes/*.mdx`,
];

return await globby(globs);
}

main().then(() => process.exit());
64 changes: 64 additions & 0 deletions scripts/js/lib/markdownTitles.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// This code is a Qiskit project.
//
// (C) Copyright IBM 2023.
//
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
//
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.

import { expect, test } from "@playwright/test";

import { collectHeadingTitleMismatch } from "./markdownTitles";

test("Test for matching titles and headings", async () => {
const markdown1 = `---
title: My Awesome Guide
---

# My Awesome Guide
`;
const mismatched = await collectHeadingTitleMismatch(markdown1);
const result: Set<string> = new Set();
expect(mismatched).toEqual(result);
});

test("Test to find mismatched titles and headings", async () => {
const markdown2 = `---
title: Qiskit Doc
author: John
---

# Introduction

This guide will walk you through everything.`;

const mismatched2 = await collectHeadingTitleMismatch(markdown2);

const result2: Set<string> = new Set([
`Mismatch: frontmatter title "Qiskit Doc" does not match heading "Introduction"`,
]);

expect(mismatched2).toEqual(result2);
});

test("Test to mismatched and complex titles and headings", async () => {
const markdown2 = `---
title: My Awesome Guide
---

# This is a *Heading*

This guide will walk you through everything.`;

const mismatched2 = await collectHeadingTitleMismatch(markdown2);

const result2: Set<string> = new Set([
`Mismatch: frontmatter title "My Awesome Guide" does not match heading "This is a Heading"`,
]);

expect(mismatched2).toEqual(result2);
});
66 changes: 66 additions & 0 deletions scripts/js/lib/markdownTitles.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import { unified } from "unified";
import remarkParse from "remark-parse";
import remarkGfm from "remark-gfm";
import remarkFrontmatter from "remark-frontmatter";
import { visit, EXIT } from "unist-util-visit";
import { Root } from "mdast";
import yaml from "js-yaml";

// Helper to recursively extract visible text from heading node
function extractText(node: any): string {
if (node.type === "text" || node.type === "inlineCode") {
return node.value;
}

if (node.type === "link" && node.children) {
return node.children.map(extractText).join(" ");
}

if (node.children && Array.isArray(node.children)) {
return node.children.map(extractText).join(" ");
}

return "";
}

export async function collectHeadingTitleMismatch(
markdown: string,
): Promise<Set<string>> {
const mismatches = new Set<string>();

let frontmatterTitle: string | undefined;
let headingText: string | undefined;

const processor = unified()
.use(remarkParse)
.use(remarkGfm)
.use(remarkFrontmatter, ["yaml"]);

const tree = processor.parse(markdown);

// Extract frontmatter title
visit(tree, "yaml", (node: any) => {
const data = yaml.load(node.value);
if (typeof data === "object" && data !== null && "title" in data) {
frontmatterTitle = (data as any).title;
return EXIT;
}
});

// Extract first level-1 heading with full formatting
visit(tree, "heading", (node: any) => {
if (node.depth === 1 && !headingText) {
headingText = extractText(node).trim();
return EXIT;
}
});

// Compare and collect mismatch
if (frontmatterTitle && headingText && frontmatterTitle !== headingText) {
mismatches.add(
`Mismatch: frontmatter title "${frontmatterTitle}" does not match heading "${headingText}"`,
);
}

return mismatches;
}