1
1
import { dirname } from "node:path" ;
2
2
import { mkdir , writeFile } from "node:fs/promises" ;
3
3
import {
4
- findTags ,
4
+ findByClasses ,
5
+ findByTags ,
5
6
getTextContent ,
6
7
loadHtmlIndices ,
8
+ loadSvgSinglePage ,
7
9
parseHtml ,
8
10
} from "./crawler" ;
9
11
10
12
// Crawl WHATWG HTML.
11
13
12
- const html = await loadHtmlIndices ( ) ;
13
- const document = parseHtml ( html ) ;
14
-
15
14
type Element = {
16
15
description : string ;
17
16
categories : string [ ] ;
@@ -24,12 +23,15 @@ const elementsByTag: Record<string, Element> = {};
24
23
* scrape elements table with content model
25
24
*/
26
25
{
27
- const table = findTags ( document , "table" ) . find ( ( table ) => {
28
- const [ caption ] = findTags ( table , "caption" ) ;
26
+ const html = await loadHtmlIndices ( ) ;
27
+ const document = parseHtml ( html ) ;
28
+
29
+ const table = findByTags ( document , "table" ) . find ( ( table ) => {
30
+ const [ caption ] = findByTags ( table , "caption" ) ;
29
31
return getTextContent ( caption ) . toLowerCase ( ) . includes ( "list of elements" ) ;
30
32
} ) ;
31
- const [ tbody ] = findTags ( table , "tbody" ) ;
32
- const rows = findTags ( tbody , "tr" ) ;
33
+ const [ tbody ] = findByTags ( table , "tbody" ) ;
34
+ const rows = findByTags ( tbody , "tr" ) ;
33
35
const parseList = ( text : string ) => {
34
36
return text
35
37
. trim ( )
@@ -54,6 +56,7 @@ const elementsByTag: Record<string, Element> = {};
54
56
return item ;
55
57
}
56
58
) ;
59
+ categories . unshift ( "html-element" ) ;
57
60
let children = parseList ( getTextContent ( row . childNodes [ 4 ] ) ) ;
58
61
for ( const tag of elements ) {
59
62
// textarea does not have value attribute and text content is used as initial value
@@ -78,6 +81,39 @@ const elementsByTag: Record<string, Element> = {};
78
81
}
79
82
}
80
83
84
+ {
85
+ const svg = await loadSvgSinglePage ( ) ;
86
+ const document = parseHtml ( svg ) ;
87
+ const summaries = findByClasses ( document , "element-summary" ) ;
88
+ for ( const summary of summaries ) {
89
+ const [ name ] = findByClasses ( summary , "element-summary-name" ) . map ( ( item ) =>
90
+ getTextContent ( item ) . slice ( 1 , - 1 )
91
+ ) ;
92
+ const children : string [ ] = [ ] ;
93
+ const [ dl ] = findByTags ( summary , "dl" ) ;
94
+ for ( let index = 0 ; index < dl . childNodes . length ; index += 1 ) {
95
+ const child = dl . childNodes [ index ] ;
96
+ if ( getTextContent ( child ) . toLowerCase ( ) . includes ( "content model" ) ) {
97
+ const dd = dl . childNodes [ index + 1 ] ;
98
+ for ( const elementName of findByClasses ( dd , "element-name" ) ) {
99
+ children . push ( getTextContent ( elementName ) . slice ( 1 , - 1 ) ) ;
100
+ }
101
+ }
102
+ }
103
+ if ( elementsByTag [ name ] ) {
104
+ console . info ( `${ name } element from SVG specification is skipped` ) ;
105
+ continue ;
106
+ }
107
+ const categories = name === "svg" ? [ "flow" , "phrasing" ] : [ "none" ] ;
108
+ categories . unshift ( "svg-element" ) ;
109
+ elementsByTag [ name ] = {
110
+ description : "" ,
111
+ categories,
112
+ children,
113
+ } ;
114
+ }
115
+ }
116
+
81
117
const contentModel = `type Element = {
82
118
description: string;
83
119
categories: string[];
0 commit comments