1- import type { Cheerio , Element } from "cheerio" ;
1+ import axios from "axios" ;
2+ import type { CheerioAPI } from "cheerio" ;
23import { glob } from "glob" ;
34
45import { readFile } from "fs/promises" ;
56import { basename } from "path" ;
67
7- import { flattenDomFromFile , load } from "./cheerio" ;
8+ import { flattenDomFromFile , load , type CheerioAnyNode } from "./cheerio" ;
89import { generateId } from "./common" ;
910
1011export type WcagVersion = "20" | "21" | "22" ;
@@ -34,40 +35,21 @@ export const actRules = (
3435) [ "act-rules" ] ;
3536
3637/**
37- * Returns an object with keys for each existing WCAG 2 version,
38- * each mapping to an array of basenames of HTML files under understanding/<version>
39- * (Functionally equivalent to "guidelines-versions" target in build.xml)
38+ * Flattened object hash, mapping each WCAG 2 SC slug to the earliest WCAG version it applies to.
39+ * (Functionally equivalent to "guidelines-versions" target in build.xml; structurally inverted)
4040 */
41- export async function getGuidelinesVersions ( ) {
41+ const scVersions = await ( async function ( ) {
4242 const paths = await glob ( "*/*.html" , { cwd : "understanding" } ) ;
43- const versions : Record < WcagVersion , string [ ] > = { "20" : [ ] , "21" : [ ] , "22" : [ ] } ;
43+ const map : Record < string , WcagVersion > = { } ;
4444
4545 for ( const path of paths ) {
46- const [ version , filename ] = path . split ( "/" ) ;
47- assertIsWcagVersion ( version ) ;
48- versions [ version ] . push ( basename ( filename , ".html" ) ) ;
46+ const [ fileVersion , filename ] = path . split ( "/" ) ;
47+ assertIsWcagVersion ( fileVersion ) ;
48+ map [ basename ( filename , ".html" ) ] = fileVersion ;
4949 }
5050
51- for ( const version of Object . keys ( versions ) ) {
52- assertIsWcagVersion ( version ) ;
53- versions [ version ] . sort ( ) ;
54- }
55- return versions ;
56- }
57-
58- /**
59- * Like getGuidelinesVersions, but mapping each basename to the version it appears in
60- */
61- export async function getInvertedGuidelinesVersions ( ) {
62- const versions = await getGuidelinesVersions ( ) ;
63- const invertedVersions : Record < string , string > = { } ;
64- for ( const [ version , basenames ] of Object . entries ( versions ) ) {
65- for ( const basename of basenames ) {
66- invertedVersions [ basename ] = version ;
67- }
68- }
69- return invertedVersions ;
70- }
51+ return map ;
52+ } ) ( ) ;
7153
7254export interface DocNode {
7355 id : string ;
@@ -79,15 +61,15 @@ export interface DocNode {
7961export interface Principle extends DocNode {
8062 content : string ;
8163 num : `${number } `; // typed as string for consistency with guidelines/SC
82- version : "WCAG20 " ;
64+ version : "20 " ;
8365 guidelines : Guideline [ ] ;
8466 type : "Principle" ;
8567}
8668
8769export interface Guideline extends DocNode {
8870 content : string ;
8971 num : `${Principle [ "num" ] } .${number } `;
90- version : `WCAG${ "20" | "21" } ` ;
72+ version : "20" | "21" ;
9173 successCriteria : SuccessCriterion [ ] ;
9274 type : "Guideline" ;
9375}
@@ -97,50 +79,63 @@ export interface SuccessCriterion extends DocNode {
9779 num : `${Guideline [ "num" ] } .${number } `;
9880 /** Level may be empty for obsolete criteria */
9981 level : "A" | "AA" | "AAA" | "" ;
100- version : `WCAG${ WcagVersion } ` ;
82+ version : WcagVersion ;
10183 type : "SC" ;
10284}
10385
10486export function isSuccessCriterion ( criterion : any ) : criterion is SuccessCriterion {
10587 return ! ! ( criterion ?. type === "SC" && "level" in criterion ) ;
10688}
10789
90+ /** Version-dependent overrides of SC shortcodes for older versions */
91+ export const scSlugOverrides : Record < string , ( version : WcagVersion ) => string > = {
92+ "target-size-enhanced" : ( version ) => ( version < "22" ? "target-size" : "target-size-enhanced" ) ,
93+ } ;
94+
95+ /** Selectors ignored when capturing content of each Principle / Guideline / SC */
96+ const contentIgnores = [
97+ "h1, h2, h3, h4, h5, h6" ,
98+ "section" ,
99+ ".change" ,
100+ ".conformance-level" ,
101+ // Selectors below are specific to pre-published guidelines (for previous versions)
102+ ".header-wrapper" ,
103+ ".doclinks" ,
104+ ] ;
105+
108106/**
109- * Returns HTML content used for Understanding guideline/SC boxes.
107+ * Returns HTML content used for Understanding guideline/SC boxes and term definitions .
110108 * @param $el Cheerio element of the full section from flattened guidelines/index.html
111109 */
112- const getContentHtml = ( $el : Cheerio < Element > ) => {
110+ const getContentHtml = ( $el : CheerioAnyNode ) => {
113111 // Load HTML into a new instance, remove elements we don't want, then return the remainder
114112 const $ = load ( $el . html ( ) ! , null , false ) ;
115- $ ( "h1, h2, h3, h4, h5, h6, section, .change, .conformance-level" ) . remove ( ) ;
116- return $ . html ( ) ;
113+ $ ( contentIgnores . join ( ", " ) ) . remove ( ) ;
114+ return $ . html ( ) . trim ( ) ;
117115} ;
118116
119- /**
120- * Resolves information from guidelines/index.html;
121- * comparable to the principles section of wcag.xml from the guidelines-xml Ant task.
122- */
123- export async function getPrinciples ( ) {
124- const versions = await getInvertedGuidelinesVersions ( ) ;
125- const $ = await flattenDomFromFile ( "guidelines/index.html" ) ;
126-
117+ /** Performs processing common across WCAG versions */
118+ function processPrinciples ( $ : CheerioAPI ) {
127119 const principles : Principle [ ] = [ ] ;
128120 $ ( ".principle" ) . each ( ( i , el ) => {
129121 const guidelines : Guideline [ ] = [ ] ;
130- $ ( ".guideline" , el ) . each ( ( j , guidelineEl ) => {
122+ $ ( "> .guideline" , el ) . each ( ( j , guidelineEl ) => {
131123 const successCriteria : SuccessCriterion [ ] = [ ] ;
132- $ ( ".sc" , guidelineEl ) . each ( ( k , scEl ) => {
133- const resolvedVersion = versions [ scEl . attribs . id ] ;
134- assertIsWcagVersion ( resolvedVersion ) ;
135-
124+ // Source uses sc class, published uses guideline class (again)
125+ $ ( "> .guideline, > .sc" , guidelineEl ) . each ( ( k , scEl ) => {
126+ const scId = scEl . attribs . id ;
136127 successCriteria . push ( {
137128 content : getContentHtml ( $ ( scEl ) ) ,
138- id : scEl . attribs . id ,
129+ id : scId ,
139130 name : $ ( "h4" , scEl ) . text ( ) . trim ( ) ,
140131 num : `${ i + 1 } .${ j + 1 } .${ k + 1 } ` ,
141- level : $ ( "p.conformance-level" , scEl ) . text ( ) . trim ( ) as SuccessCriterion [ "level" ] ,
132+ // conformance-level contains only letters in source, full (Level ...) in publish
133+ level : $ ( "p.conformance-level" , scEl )
134+ . text ( )
135+ . trim ( )
136+ . replace ( / ^ \( L e v e l ( .* ) \) $ / , "$1" ) as SuccessCriterion [ "level" ] ,
142137 type : "SC" ,
143- version : `WCAG ${ resolvedVersion } ` ,
138+ version : scVersions [ scId ] ,
144139 } ) ;
145140 } ) ;
146141
@@ -150,7 +145,7 @@ export async function getPrinciples() {
150145 name : $ ( "h3" , guidelineEl ) . text ( ) . trim ( ) ,
151146 num : `${ i + 1 } .${ j + 1 } ` ,
152147 type : "Guideline" ,
153- version : guidelineEl . attribs . id === "input-modalities" ? "WCAG21 " : "WCAG20 " ,
148+ version : guidelineEl . attribs . id === "input-modalities" ? "21 " : "20 " ,
154149 successCriteria,
155150 } ) ;
156151 } ) ;
@@ -161,14 +156,21 @@ export async function getPrinciples() {
161156 name : $ ( "h2" , el ) . text ( ) . trim ( ) ,
162157 num : `${ i + 1 } ` ,
163158 type : "Principle" ,
164- version : "WCAG20 " ,
159+ version : "20 " ,
165160 guidelines,
166161 } ) ;
167162 } ) ;
168163
169164 return principles ;
170165}
171166
167+ /**
168+ * Resolves information from guidelines/index.html;
169+ * comparable to the principles section of wcag.xml from the guidelines-xml Ant task.
170+ */
171+ export const getPrinciples = async ( ) =>
172+ processPrinciples ( await flattenDomFromFile ( "guidelines/index.html" ) ) ;
173+
172174/**
173175 * Returns a flattened object hash, mapping shortcodes to each principle/guideline/SC.
174176 */
@@ -225,3 +227,62 @@ export async function getTermsMap() {
225227
226228 return terms ;
227229}
230+
231+ // Version-specific APIs
232+
233+ const remoteGuidelines$ : Partial < Record < WcagVersion , CheerioAPI > > = { } ;
234+
235+ /** Loads guidelines from TR space for specific version, caching for future calls. */
236+ const loadRemoteGuidelines = async ( version : WcagVersion ) => {
237+ if ( ! remoteGuidelines$ [ version ] ) {
238+ const $ = load (
239+ ( await axios . get ( `https://www.w3.org/TR/WCAG${ version } /` , { responseType : "text" } ) ) . data
240+ ) ;
241+
242+ // Re-collapse definition links and notes, to be processed by this build system
243+ $ ( ".guideline a.internalDFN" ) . removeAttr ( "class data-link-type id href title" ) ;
244+ $ ( ".guideline [role='note'] .marker" ) . remove ( ) ;
245+ $ ( ".guideline [role='note']" ) . find ( "> div, > p" ) . addClass ( "note" ) . unwrap ( ) ;
246+
247+ // Bibliography references are not processed in Understanding SC boxes
248+ $ ( ".guideline cite:has(a.bibref:only-child)" ) . each ( ( _ , el ) => {
249+ const $el = $ ( el ) ;
250+ const $parent = $el . parent ( ) ;
251+ $el . remove ( ) ;
252+ // Remove surrounding square brackets (which aren't in a dedicated element)
253+ $parent . html ( $parent . html ( ) ! . replace ( / \[ \] / g, "" ) ) ;
254+ } ) ;
255+
256+ // Remove extra markup from headings so they can be parsed for names
257+ $ ( "bdi" ) . remove ( ) ;
258+
259+ // Remove abbr elements which exist only in TR, not in informative docs
260+ $ ( "#acknowledgements li abbr" ) . each ( ( _ , abbrEl ) => {
261+ $ ( abbrEl ) . replaceWith ( $ ( abbrEl ) . text ( ) ) ;
262+ } ) ;
263+
264+ remoteGuidelines$ [ version ] = $ ;
265+ }
266+ return remoteGuidelines$ [ version ] ! ;
267+ } ;
268+
269+ /**
270+ * Retrieves heading and content information for acknowledgement subsections,
271+ * for preserving the section in About pages for earlier versions.
272+ */
273+ export const getAcknowledgementsForVersion = async ( version : WcagVersion ) => {
274+ const $ = await loadRemoteGuidelines ( version ) ;
275+ const subsections : Record < string , string > = { } ;
276+
277+ $ ( "section#acknowledgements section" ) . each ( ( _ , el ) => {
278+ subsections [ el . attribs . id ] = $ ( ".header-wrapper + *" , el ) . html ( ) ! ;
279+ } ) ;
280+
281+ return subsections ;
282+ } ;
283+
284+ /**
285+ * Retrieves and processes a pinned WCAG version using published guidelines.
286+ */
287+ export const getPrinciplesForVersion = async ( version : WcagVersion ) =>
288+ processPrinciples ( await loadRemoteGuidelines ( version ) ) ;
0 commit comments