Skip to content

Add check that page title is in sync with ToC, h1, and metadata #3669

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/api-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,5 @@ jobs:
run: npm run check:orphan-pages -- --apis
- name: Check metadata
run: npm run check:metadata -- --apis
- name: Check images
run: npm run check:images -- --apis
- name: Check markdown
run: npm run check:markdown -- --apis
4 changes: 2 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ jobs:

- name: File metadata
run: npm run check:metadata
- name: Check images
run: npm run check:images
- name: Check markdown
run: npm run check:markdown
- name: Spellcheck
run: npm run check:spelling
- name: Check Qiskit bot config
Expand Down
2 changes: 1 addition & 1 deletion check
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ CHECKS = {
"metadata": ["npm", "run", "check:metadata"],
"patterns index pages": ["npm", "run", "check:patterns-index"],
"tutorials index page": ["python3", "scripts/ci/check-tutorials-index.py"],
"images": ["npm", "run", "check:images"],
"images": ["npm", "run", "check:markdown"],
"orphan pages": ["npm", "run", "check:orphan-pages"],
"spelling": ["npm", "run", "check:spelling"],
"internal links": ["npm", "run", "check:internal-links"],
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"test": "playwright test",
"typecheck": "tsc",
"check": "npm run check:patterns-index && npm run check:qiskit-bot && npm run check:metadata && npm run check:images && npm run check:orphan-pages && npm run check:spelling && npm run check:internal-links",
"check:images": "tsx scripts/js/commands/checkImages.ts",
"check:markdown": "tsx scripts/js/commands/checkMarkdown.ts",
"check:metadata": "tsx scripts/js/commands/checkMetadata.ts",
"check:spelling": "tsx scripts/js/commands/checkSpelling.ts",
"check:fmt": "prettier --check .",
Expand Down
82 changes: 0 additions & 82 deletions scripts/js/commands/checkImages.ts

This file was deleted.

147 changes: 147 additions & 0 deletions scripts/js/commands/checkMarkdown.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
// This code is a Qiskit project.
//
// (C) Copyright IBM 2024.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// (C) Copyright IBM 2024.
// (C) Copyright IBM 2025.

//
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
//
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.

import { globby } from "globby";
import yargs from "yargs/yargs";
import { hideBin } from "yargs/helpers";

import { collectInvalidImageErrors } from "../lib/markdownImages.js";
import { readMarkdown } from "../lib/markdownReader.js";
import { collectHeadingTitleMismatch } from "../lib/markdownTitles.js";
import { parseMarkdown } from "../lib/markdownUtils";

const IGNORE_TITLE_MISMATCHES: string[] = [
"docs/migration-guides/external-providers-primitives-v2.mdx",
"docs/migration-guides/local-simulators.mdx",
"docs/migration-guides/metapackage-migration.mdx",
"docs/migration-guides/qiskit-1.0-features.mdx",
"docs/migration-guides/qiskit-1.0-installation.mdx",
"docs/migration-guides/qiskit-algorithms-module.mdx",
"docs/migration-guides/qiskit-backend-primitives.mdx",
"docs/migration-guides/qiskit-backendv1-to-v2.mdx",
"docs/migration-guides/qiskit-opflow-module.mdx",
"docs/migration-guides/qiskit-runtime-from-ibm-provider.mdx",
"docs/migration-guides/qiskit-runtime-from-ibmq-provider.mdx",
"docs/migration-guides/qiskit-runtime-options.mdx",
"docs/guides/access-groups.mdx",
"docs/migration-guides/v2-primitives.mdx",
"docs/guides/execution-modes.mdx",
"docs/guides/install-qiskit-source.mdx",
"docs/guides/manage-cost.mdx",
"docs/guides/plans-overview.mdx",
"docs/guides/qiskit-addons-aqc.mdx",
"docs/guides/qiskit-addons-sqd.mdx",
"docs/guides/qiskit-code-assistant-vscode.mdx",
"docs/guides/qiskit-function-templates.mdx",
"docs/guides/serverless.mdx",
"docs/open-source/code-of-conduct.mdx",
"docs/open-source/create-a-provider.mdx",
"docs/support/execution-modes-faq.mdx",
"docs/support/faq.mdx",
"learning/index.mdx",
"learning/courses/basics-of-quantum-information/exam.mdx",
"learning/courses/basics-of-quantum-information/index.mdx",
"learning/courses/foundations-of-quantum-error-correction/index.mdx",
"learning/courses/fundamentals-of-quantum-algorithms/exam.mdx",
"learning/courses/fundamentals-of-quantum-algorithms/index.mdx",
"learning/courses/quantum-business-foundations/business-impacts.mdx",
"learning/courses/quantum-business-foundations/exam.mdx",
"learning/courses/quantum-business-foundations/quantum-computing-fundamentals.mdx",
"learning/courses/quantum-business-foundations/quantum-technology.mdx",
"learning/courses/quantum-business-foundations/start-your-quantum-journey.mdx",
"learning/courses/quantum-chem-with-vqe/exam.mdx",
"learning/courses/quantum-diagonalization-algorithms/exam.mdx",
"learning/courses/quantum-machine-learning/exam.mdx",
"learning/courses/quantum-safe-cryptography/exam.mdx",
"learning/courses/utility-scale-quantum-computing/classical-simulation.mdx",
"learning/courses/variational-algorithm-design/exam.mdx",
];

const allErrors: string[] = [];

interface Arguments {
[x: string]: unknown;
apis: boolean;
}

const readArgs = (): Arguments => {
return yargs(hideBin(process.argv))
.version(false)
.option("apis", {
type: "boolean",
default: false,
description:
"Check files in the current and dev versions of the API docs have matching titles and metadata.",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that we check both titles and images, we shouldn't be overly specific

Suggested change
"Check files in the current and dev versions of the API docs have matching titles and metadata.",
"Check files in the current and dev versions of the API docs.",

})
.parseSync();
};

async function main() {
const args = readArgs();
const files = await determineContentFiles(args);

for (const file of files) {
const markdown = await readMarkdown(file);
const tree = parseMarkdown(markdown);
const imageErrors = await collectInvalidImageErrors(tree);
const mismatchedTitleHeadingErrors =
IGNORE_TITLE_MISMATCHES.includes(file) || file.startsWith("docs/api")
? new Set<string>()
: await collectHeadingTitleMismatch(tree);

//Collect all errors for this file
const errorsInFile: string[] = [
...imageErrors,
...mismatchedTitleHeadingErrors,
];

if (errorsInFile.length) {
allErrors.push(
`Error in file '${file}':\n\t- ${errorsInFile.join("\n\t- ")}\n`,
);
}
}

// Final error report
if (allErrors.length) {
allErrors.forEach((error) => console.log(error));
console.error(
"💔 Some issues were found in your Markdown files. Please fix them before proceeding.\n" +
"Image help: https://github.com/Qiskit/documentation#images\n" +
"Title/Heading help: https://github.com/Qiskit/documentation#titles-and-headings\n",
);
process.exit(1);
}

console.log("✅ All files passed validation.\n");
}

async function determineContentFiles(args: Arguments): Promise<string[]> {
// We always skip historical versions to simplify the code and to have a faster script.
// Even though it is possible for someone to add a new image without alt text to a
// historical version that wasn't in the original release, that's very unlikely.
// If it happens, it would probably be a backport from latest or dev, and the linter in
// the API repo should catch it.
//
// If an image is missed by the API repo's linter, it will still have an alt text defined,
// although it won't be very useful. That's because Sphinx auto-generates alt text.
const globs = [
"{docs,learning}/**/*.{ipynb,mdx}",
args.apis ? "!docs/api/*/([0-9]*)/*.mdx" : "!docs/api/**/*.mdx",
// Remove when https://github.com/Qiskit/documentation/issues/2564 is fixed
`!docs/api/qiskit/release-notes/*.mdx`,
];

return await globby(globs);
}

main().then(() => process.exit());
19 changes: 18 additions & 1 deletion scripts/js/lib/markdownImages.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,19 @@
// that they have been altered from the originals.

import { expect, test } from "@playwright/test";
import { unified } from "unified";
import remarkParse from "remark-parse";
import remarkGfm from "remark-gfm";
import remarkFrontmatter from "remark-frontmatter";
import { Root } from "mdast";

function parseMarkdown(markdown: string): Root {
return unified()
.use(remarkParse)
.use(remarkGfm)
.use(remarkFrontmatter, ["yaml"])
.parse(markdown);
}
Comment on lines +20 to +26
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a nice helper function, what do you think about moving it to a new file (e.g. scripts/js/lib/markdownUtils.ts) and re-using it in checkMarkdown.ts?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment still seems relevant


import { collectInvalidImageErrors } from "./markdownImages.js";

Expand Down Expand Up @@ -45,8 +58,12 @@ test("Test the finding of invalid images", async () => {
![And a valid SVG](/learning/images/valid.svg)

<img src="/learning/images/HTMLexample1.jpg" alt="" width="200"/>

`;
const images = await collectInvalidImageErrors(markdown);

const tree = parseMarkdown(markdown);
const images = await collectInvalidImageErrors(tree);

const correct_images = new Set([
"Convert 'img1.png' to AVIF. You can use the command `magick <path/to/image>.png <path/to/image>.avif`. If ImageMagick isn't preinstalled, you can get it from https://imagemagick.org/script/download.php. Then delete the old file and update the markdown to point to the new file.",
"Convert 'img2.png' to AVIF. You can use the command `magick <path/to/image>.png <path/to/image>.avif`. If ImageMagick isn't preinstalled, you can get it from https://imagemagick.org/script/download.php. Then delete the old file and update the markdown to point to the new file.",
Expand Down
68 changes: 25 additions & 43 deletions scripts/js/lib/markdownImages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,55 +11,37 @@
// that they have been altered from the originals.

import { load } from "cheerio";
import { unified } from "unified";
import { Root } from "remark-mdx";
import { Root } from "mdast";
import { visit } from "unist-util-visit";
import remarkParse from "remark-parse";
import remarkGfm from "remark-gfm";
import remarkStringify from "remark-stringify";
import { last, split } from "lodash-es";

export async function collectInvalidImageErrors(
markdown: string,
): Promise<Set<string>> {
export function collectInvalidImageErrors(tree: Root): Set<string> {
const imagesErrors = new Set<string>();

await unified()
.use(remarkParse)
.use(remarkGfm)
.use(() => (tree: Root) => {
visit(tree, "image", (node) => {
// Sphinx uses the image path as alt text if it wasn't defined using the
// :alt: option.
const imageName = last(split(node.url, "/"));
if (!node.alt || node.alt.endsWith(imageName!)) {
imagesErrors.add(`The image '${node.url}' does not have alt text.`);
}
visit(tree, "image", (node) => {
const imageName = last(split(node.url, "/"));
if (!node.alt || node.alt.endsWith(imageName!)) {
imagesErrors.add(`The image '${node.url}' does not have alt text.`);
}

// Ask to convert PNG and JPEG to AVIF
if (node.url.match(/\.(png|jpe?g)$/)) {
const urlWithAvifExtension = node.url.replace(
/\.(png|jpe?g)$/,
".avif",
);
imagesErrors.add(
`Convert '${imageName}' to AVIF. You can use the command \`magick <path/to/image>.png <path/to/image>.avif\`. ` +
`If ImageMagick isn't preinstalled, you can get it from https://imagemagick.org/script/download.php. ` +
`Then delete the old file and update the markdown to point to the new file.`,
);
}
});
visit(tree, "html", (node) => {
const $ = load(node.value);
if ($("img").length) {
imagesErrors.add(
`The image '${$("img").attr("src")}' uses an HTML <img> tag instead of markdown syntax.`,
);
}
});
})
.use(remarkStringify)
.process(markdown);
if (node.url.match(/\.(png|jpe?g)$/)) {
const urlWithAvifExtension = node.url.replace(/\.(png|jpe?g)$/, ".avif");
imagesErrors.add(
`Convert '${imageName}' to AVIF. You can use the command \`magick <path/to/image>.png <path/to/image>.avif\`. ` +
`If ImageMagick isn't preinstalled, you can get it from https://imagemagick.org/script/download.php. ` +
`Then delete the old file and update the markdown to point to the new file.`,
);
}
});

visit(tree, "html", (node) => {
const $ = load(node.value);
if ($("img").length) {
imagesErrors.add(
`The image '${$("img").attr("src")}' uses an HTML <img> tag instead of markdown syntax.`,
);
}
});

return imagesErrors;
}
Loading