From 6268df2fb10c50c6437ef951a1aa6dc3569efbe4 Mon Sep 17 00:00:00 2001 From: Francois Daoust Date: Mon, 24 Mar 2025 11:57:16 +0100 Subject: [PATCH 1/6] Add "freeze patch" mechanism, add one for filter-effects-1 This creates a new way to curate the results of a crawl: a freeze patch allows to freeze the results of a crawl to a specific commit ID in Webref. This is meant for specs that are temporarily broken beyond repair. First such patch is for the filter-effects-1 spec. Each freeze patch is implemented as a JSON file named after the spec's shortname and that contains the commit ID and a `pending` key that links to the issue that tracks the problem in some GitHub repository. The `clean-patches` script proposes to drop the patch when the issue gets closed. --- ed/freezepatches/README.md | 8 +++ ed/freezepatches/filter-effects-1.json | 4 ++ tools/apply-patches.js | 79 +++++++++++++++++++++++++- tools/clean-patches.js | 13 ++++- 4 files changed, 100 insertions(+), 4 deletions(-) create mode 100644 ed/freezepatches/README.md create mode 100644 ed/freezepatches/filter-effects-1.json diff --git a/ed/freezepatches/README.md b/ed/freezepatches/README.md new file mode 100644 index 000000000000..1deabd84c928 --- /dev/null +++ b/ed/freezepatches/README.md @@ -0,0 +1,8 @@ +# Freeze patches + +These are patches applied to specs to freeze all the extracts for that spec to a past result, identified by a commit ID. + +Each patch should be a JSON file named after the spec's shortname that defines a JSON object with two keys: + +- `commit`: The full commit ID in Webref that identifies the crawl results to use for the spec. +- `pending`: The URL of an issue that tracks the problem. This allows to detect when the patch is no longer needed. diff --git a/ed/freezepatches/filter-effects-1.json b/ed/freezepatches/filter-effects-1.json new file mode 100644 index 000000000000..829f88cd97d8 --- /dev/null +++ b/ed/freezepatches/filter-effects-1.json @@ -0,0 +1,4 @@ +{ + "commit": "40defbb2afaad2bc98fca473f7dbea844eb4020e", + "pending": "https://github.com/w3c/fxtf-drafts/issues/591" +} diff --git a/tools/apply-patches.js b/tools/apply-patches.js index 8d1b0fa55043..5e57b6db1f93 100644 --- a/tools/apply-patches.js +++ b/tools/apply-patches.js @@ -19,7 +19,7 @@ import path from 'node:path'; import util from 'node:util'; import { fileURLToPath } from 'node:url'; import { execFile as execCb } from 'node:child_process'; -import { createFolderIfNeeded } from './utils.js'; +import { createFolderIfNeeded, loadJSON } from './utils.js'; const execFile = util.promisify(execCb); async function applyPatches(rawFolder, outputFolder, type) { @@ -61,6 +61,7 @@ async function applyPatches(rawFolder, outputFolder, type) { ]; await createFolderIfNeeded(outputFolder); + await applyFreezePatches(rawFolder, outputFolder); for (const { name, srcDir, dstDir, patchDir, fileExt } of packages) { if (!type.includes(name)) { @@ -100,6 +101,82 @@ async function applyPatches(rawFolder, outputFolder, type) { } +/** + * Apply "freeze" patches, which freeze curation data for a spec to the results + * of a previous crawl result, identified by a commit ID. + * + * Freeze patches are meant to be used for specs that are (hopefully + * temporarily) severely broken. + */ +async function applyFreezePatches(rawFolder, outputFolder) { + const patchDir = path.join(rawFolder, 'freezepatches'); + const patchFiles = await fs.readdir(patchDir); + + const outputIndex = await loadJSON(path.join(outputFolder, 'index.json')); + let patchApplied = false; + + for (const file of patchFiles) { + if (!file.endsWith('.json')) { + continue; + } + + const shortname = file.replace(/\.json$/, ''); + const branchName = `freezepatch-${shortname}`; + const patch = path.join(patchDir, file); + const json = await loadJSON(patch); + + console.log(`Applying ${path.relative(rawFolder, patch)}`); + const outputSpecPos = outputIndex.results.findIndex(spec => spec.shortname === shortname); + + // Get back to the patch commit + // (note this does not touch the `curated` folder because it is in + // the `.gitignore` file) + await execFile('git', ['checkout', '-B', branchName, json.commit]); + + const crawlIndex = await loadJSON(path.join(rawFolder, 'index.json')); + const crawlSpec = crawlIndex.results.find(spec => spec.shortname === shortname); + + for (const [extractType, extractFile] of Object.entries(crawlSpec)) { + if (extractType === 'cddl') { + // Handle CDDL extracts separately, it's an array of extracts + for (const { file: cddlFile } of extractFile) { + await fs.copyFile( + path.join(rawFolder, cddlFile), + path.join(outputFolder, cddlFile) + ); + } + } + else if (!extractFile || + (typeof extractFile !== 'string') || + !extractFile.match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) { + // Skip properties that do not link to an extract + continue; + } + else { + await fs.copyFile( + path.join(rawFolder, extractFile), + path.join(outputFolder, extractFile) + ); + } + outputIndex.results.splice(outputSpecPos, 1, crawlSpec); + } + + await execFile('git', ['checkout', 'main']); + await execFile('git', ['branch', '-D', branchName]); + patchApplied = true; + } + + // Update curated version of the index.json file + if (patchApplied) { + await fs.writeFile( + path.join(outputFolder, 'index.json'), + JSON.stringify(outputIndex, null, 2), + 'utf8' + ); + } +} + + /************************************************** Export methods for use as module **************************************************/ diff --git a/tools/clean-patches.js b/tools/clean-patches.js index 018d633ced9b..606529bb1770 100644 --- a/tools/clean-patches.js +++ b/tools/clean-patches.js @@ -30,7 +30,7 @@ async function dropPatchesWhenPossible() { if (subDir.endsWith("patches")) { const files = fs.readdirSync(path.join(rootDir, subDir)); for (const file of files) { - if (file.endsWith(".patch")) { + if (file.endsWith(".patch") || file.endsWith(".json")) { const patch = path.join(subDir, file); console.log(`- add "${patch}"`); patches.push({ name: patch }); @@ -44,9 +44,16 @@ async function dropPatchesWhenPossible() { const diffStart = /^---$/m; const issueUrl = /(?<=^|\s)https:\/\/github\.com\/([^\/]+)\/([^\/]+)\/(issues|pull)\/(\d+)(?=\s|$)/g; for (const patch of patches) { + let patchIssues; const contents = fs.readFileSync(path.join(rootDir, patch.name), "utf8"); - const desc = contents.substring(0, contents.match(diffStart)?.index); - const patchIssues = [...desc.matchAll(issueUrl)]; + if (patch.name.endsWith(".json")) { + const json = JSON.parse(contents); + patchIssues = [...(json.pending ?? '').matchAll(issueUrl)]; + } + else { + const desc = contents.substring(0, contents.match(diffStart)?.index); + patchIssues = [...desc.matchAll(issueUrl)]; + } for (const patchIssue of patchIssues) { if (!patch.issues) { patch.issues = []; From e88e0e683f38dd60fa1651857e8b0a35597b0e77 Mon Sep 17 00:00:00 2001 From: Francois Daoust Date: Mon, 24 Mar 2025 12:15:12 +0100 Subject: [PATCH 2/6] Amend test job to checkout all the history --- .github/workflows/test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ffc09c7bc27a..29f75b8d4a31 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,6 +7,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + # Need to checkout all history for curation job + fetch-depth: 0 - uses: actions/setup-node@v4 with: node-version: 20 From b850ee37d633318f4c3bc648b0c382e0a9a7a850 Mon Sep 17 00:00:00 2001 From: Francois Daoust Date: Mon, 24 Mar 2025 12:29:21 +0100 Subject: [PATCH 3/6] Use a more recent commit ID! --- ed/freezepatches/filter-effects-1.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ed/freezepatches/filter-effects-1.json b/ed/freezepatches/filter-effects-1.json index 829f88cd97d8..31bd11249e45 100644 --- a/ed/freezepatches/filter-effects-1.json +++ b/ed/freezepatches/filter-effects-1.json @@ -1,4 +1,4 @@ { - "commit": "40defbb2afaad2bc98fca473f7dbea844eb4020e", + "commit": "647faa1643647b66b31a505e5b247a8695955352", "pending": "https://github.com/w3c/fxtf-drafts/issues/591" } From a30b785997818bf3d66ce0bea82289f414418eaf Mon Sep 17 00:00:00 2001 From: Francois Daoust Date: Mon, 24 Mar 2025 14:58:12 +0100 Subject: [PATCH 4/6] Add suggested changes, fix `removeFromCuration` for CDDL This generalizes the logic that parses property values of a crawn index to gather extract files, and applies it throughout. This actually fixes the `removeFromCuration` logic that was broken for CDDL. --- tools/apply-patches.js | 26 +++++--------------------- tools/prepare-curated.js | 25 ++++++++++++------------- tools/utils.js | 24 +++++++++++++++++++++++- 3 files changed, 40 insertions(+), 35 deletions(-) diff --git a/tools/apply-patches.js b/tools/apply-patches.js index 5e57b6db1f93..c7aa2605acc7 100644 --- a/tools/apply-patches.js +++ b/tools/apply-patches.js @@ -19,7 +19,7 @@ import path from 'node:path'; import util from 'node:util'; import { fileURLToPath } from 'node:url'; import { execFile as execCb } from 'node:child_process'; -import { createFolderIfNeeded, loadJSON } from './utils.js'; +import { createFolderIfNeeded, loadJSON, getTargetedExtracts } from './utils.js'; const execFile = util.promisify(execCb); async function applyPatches(rawFolder, outputFolder, type) { @@ -121,7 +121,6 @@ async function applyFreezePatches(rawFolder, outputFolder) { } const shortname = file.replace(/\.json$/, ''); - const branchName = `freezepatch-${shortname}`; const patch = path.join(patchDir, file); const json = await loadJSON(patch); @@ -131,28 +130,14 @@ async function applyFreezePatches(rawFolder, outputFolder) { // Get back to the patch commit // (note this does not touch the `curated` folder because it is in // the `.gitignore` file) - await execFile('git', ['checkout', '-B', branchName, json.commit]); + await execFile('git', ['checkout', json.commit]); const crawlIndex = await loadJSON(path.join(rawFolder, 'index.json')); const crawlSpec = crawlIndex.results.find(spec => spec.shortname === shortname); - for (const [extractType, extractFile] of Object.entries(crawlSpec)) { - if (extractType === 'cddl') { - // Handle CDDL extracts separately, it's an array of extracts - for (const { file: cddlFile } of extractFile) { - await fs.copyFile( - path.join(rawFolder, cddlFile), - path.join(outputFolder, cddlFile) - ); - } - } - else if (!extractFile || - (typeof extractFile !== 'string') || - !extractFile.match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) { - // Skip properties that do not link to an extract - continue; - } - else { + for (const propValue of Object.values(crawlSpec)) { + const extractFiles = getTargetedExtracts(propValue); + for (const extractFile of extracFiles) { await fs.copyFile( path.join(rawFolder, extractFile), path.join(outputFolder, extractFile) @@ -162,7 +147,6 @@ async function applyFreezePatches(rawFolder, outputFolder) { } await execFile('git', ['checkout', 'main']); - await execFile('git', ['branch', '-D', branchName]); patchApplied = true; } diff --git a/tools/prepare-curated.js b/tools/prepare-curated.js index a26a1a184ecf..bada4ff0d389 100644 --- a/tools/prepare-curated.js +++ b/tools/prepare-curated.js @@ -23,7 +23,8 @@ import { rimraf } from 'rimraf'; import { createFolderIfNeeded, loadJSON, - copyFolder } from './utils.js'; + copyFolder, + getTargetedExtracts } from './utils.js'; import { applyPatches } from './apply-patches.js'; import { dropCSSPropertyDuplicates } from './drop-css-property-duplicates.js'; import { curateEvents } from './amend-event-data.js'; @@ -35,10 +36,9 @@ import { crawlSpecs } from 'reffy'; */ async function removeFromCuration(spec, curatedFolder) { for (const property of ['cddl', 'css', 'elements', 'events', 'idl']) { - if (spec[property] && - (typeof spec[property] === 'string') && - spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl|cddl)$/)) { - const filename = path.join(curatedFolder, spec[property]); + const extractFiles = getTargetedExtracts(spec[property]); + for (const extractFile of extractFiles) { + const filename = path.join(curatedFolder, extractFile); console.log(`Removing ${spec.standing} ${spec.title} from curation: del ${filename}`); await fs.unlink(filename); } @@ -56,16 +56,15 @@ async function removeFromCuration(spec, curatedFolder) { async function cleanCrawlOutcome(spec) { for (const property of Object.keys(spec)) { // Only consider properties that link to an extract - if (spec[property] && - (typeof spec[property] === 'string') && - spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl|cddl)$/)) { - try { - await fs.lstat(path.join(curatedFolder, spec[property])); - } - catch (err) { - delete spec[property]; + const extractFiles = getTargetedExtracts(spec[property]); + try { + for (const extractFile of extractFiles) { + await fs.lstat(path.join(curatedFolder, extractFile)); } } + catch (err) { + delete spec[property]; + } } } diff --git a/tools/utils.js b/tools/utils.js index 7b942e062ccd..b223edeba113 100644 --- a/tools/utils.js +++ b/tools/utils.js @@ -68,8 +68,30 @@ async function copyFolder(source, target, { excludeRoot = false } = {}) { }; +/** + * Return the list of extract files that the given value targets. + * + * Note: The `cddl` property value targets an array of extracts, the actual + * extract being under the `file` key each time. + */ +function getTargetedExtracts(value) { + const reExtractFile = /^[^\/]+\/[^\/]+\.(json|idl|cddl)$/; + if (Array.isArray(value)) { + return value + .filter(v => typeof v.file === 'string' && v.file.match(reExtractFile)) + .map(v => v.file); + } + else if (typeof value === 'string') { + return [value]; + } + else { + return []; + } +} + export { createFolderIfNeeded, loadJSON, - copyFolder + copyFolder, + getTargetedExtracts }; \ No newline at end of file From 027c5704c5891dac8da928e5c80845b53320fba2 Mon Sep 17 00:00:00 2001 From: Francois Daoust Date: Mon, 24 Mar 2025 15:08:01 +0100 Subject: [PATCH 5/6] Fix typo --- tools/apply-patches.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/apply-patches.js b/tools/apply-patches.js index c7aa2605acc7..a8a8d72af1df 100644 --- a/tools/apply-patches.js +++ b/tools/apply-patches.js @@ -137,7 +137,7 @@ async function applyFreezePatches(rawFolder, outputFolder) { for (const propValue of Object.values(crawlSpec)) { const extractFiles = getTargetedExtracts(propValue); - for (const extractFile of extracFiles) { + for (const extractFile of extractFiles) { await fs.copyFile( path.join(rawFolder, extractFile), path.join(outputFolder, extractFile) From 9c98fbae11134eae286e77a1e3573161ed0dabe1 Mon Sep 17 00:00:00 2001 From: Francois Daoust Date: Mon, 24 Mar 2025 15:34:53 +0100 Subject: [PATCH 6/6] Fix logic for returning target files --- tools/utils.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/utils.js b/tools/utils.js index b223edeba113..96f5fb3cecf7 100644 --- a/tools/utils.js +++ b/tools/utils.js @@ -81,7 +81,7 @@ function getTargetedExtracts(value) { .filter(v => typeof v.file === 'string' && v.file.match(reExtractFile)) .map(v => v.file); } - else if (typeof value === 'string') { + else if (typeof value === 'string' && value.match(reExtractFile)) { return [value]; } else {