Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions packages/html-data/bin/aria.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import {
} from "@webstudio-is/sdk";
import { generateWebstudioComponent } from "@webstudio-is/react-sdk";
import {
findTags,
findByTags,
getAttr,
getTextContent,
loadPage,
Expand All @@ -34,11 +34,11 @@ const overrides: Record<string, Partial<Attribute>> = {

const html = await loadPage("aria1.3", "https://www.w3.org/TR/wai-aria-1.3");
const document = parseHtml(html);
const list = findTags(document, "dl").find(
const list = findByTags(document, "dl").find(
(table) => getAttr(table, "id")?.value === "index_state_prop"
);
const terms = findTags(list, "dt");
const details = findTags(list, "dd");
const terms = findByTags(list, "dt");
const details = findByTags(list, "dd");
const descriptions = new Map<string, string>();
for (let index = 0; index < terms.length; index += 1) {
const term = getTextContent(terms[index]);
Expand Down
8 changes: 4 additions & 4 deletions packages/html-data/bin/attributes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import {
} from "@webstudio-is/sdk";
import { generateWebstudioComponent } from "@webstudio-is/react-sdk";
import {
findTags,
findByTags,
getAttr,
getTextContent,
loadHtmlIndices,
Expand Down Expand Up @@ -123,11 +123,11 @@ const overrides: Record<
// Crawl WHATWG HTML.
const html = await loadHtmlIndices();
const document = parseHtml(html);
const table = findTags(document, "table").find(
const table = findByTags(document, "table").find(
(table) => getAttr(table, "id")?.value === "attributes-1"
);
const [tbody] = findTags(table, "tbody");
const rows = findTags(tbody, "tr");
const [tbody] = findByTags(table, "tbody");
const rows = findByTags(tbody, "tr");

const attributesByTag: Record<string, Attribute[]> = {};
// textarea does not have value attribute and text content is used as initial value
Expand Down
28 changes: 26 additions & 2 deletions packages/html-data/bin/crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ type Element = DefaultTreeAdapterMap["element"];

type Attribute = Element["attrs"][number];

export const findTags = (
export const findByTags = (
node: undefined | Node,
tagName: string,
result: NodeWithChildren[] = []
Expand All @@ -23,7 +23,28 @@ export const findTags = (
result.push(node);
}
for (const child of node.childNodes) {
findTags(child, tagName, result);
findByTags(child, tagName, result);
}
}
return result;
};

export const findByClasses = (
node: undefined | Node,
className: string,
result: NodeWithChildren[] = []
): NodeWithChildren[] => {
if (node && "childNodes" in node) {
if (
"tagName" in node &&
node.attrs.some(
(item) => item.name === "class" && item.value === className
)
) {
result.push(node);
}
for (const child of node.childNodes) {
findByClasses(child, className, result);
}
}
return result;
Expand Down Expand Up @@ -72,3 +93,6 @@ export const loadHtmlIndices = () =>
"html-spec-indices",
"https://html.spec.whatwg.org/multipage/indices.html"
);

export const loadSvgSinglePage = () =>
loadPage("svg-spec", "https://www.w3.org/TR/SVG11/single-page.html");
52 changes: 44 additions & 8 deletions packages/html-data/bin/elements.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import { dirname } from "node:path";
import { mkdir, writeFile } from "node:fs/promises";
import {
findTags,
findByClasses,
findByTags,
getTextContent,
loadHtmlIndices,
loadSvgSinglePage,
parseHtml,
} from "./crawler";

// Crawl WHATWG HTML.

const html = await loadHtmlIndices();
const document = parseHtml(html);

type Element = {
description: string;
categories: string[];
Expand All @@ -24,12 +23,15 @@ const elementsByTag: Record<string, Element> = {};
* scrape elements table with content model
*/
{
const table = findTags(document, "table").find((table) => {
const [caption] = findTags(table, "caption");
const html = await loadHtmlIndices();
const document = parseHtml(html);

const table = findByTags(document, "table").find((table) => {
const [caption] = findByTags(table, "caption");
return getTextContent(caption).toLowerCase().includes("list of elements");
});
const [tbody] = findTags(table, "tbody");
const rows = findTags(tbody, "tr");
const [tbody] = findByTags(table, "tbody");
const rows = findByTags(tbody, "tr");
const parseList = (text: string) => {
return text
.trim()
Expand All @@ -54,6 +56,7 @@ const elementsByTag: Record<string, Element> = {};
return item;
}
);
categories.unshift("html-element");
let children = parseList(getTextContent(row.childNodes[4]));
for (const tag of elements) {
// textarea does not have value attribute and text content is used as initial value
Expand All @@ -78,6 +81,39 @@ const elementsByTag: Record<string, Element> = {};
}
}

{
const svg = await loadSvgSinglePage();
const document = parseHtml(svg);
const summaries = findByClasses(document, "element-summary");
for (const summary of summaries) {
const [name] = findByClasses(summary, "element-summary-name").map((item) =>
getTextContent(item).slice(1, -1)
);
const children: string[] = [];
const [dl] = findByTags(summary, "dl");
for (let index = 0; index < dl.childNodes.length; index += 1) {
const child = dl.childNodes[index];
if (getTextContent(child).toLowerCase().includes("content model")) {
const dd = dl.childNodes[index + 1];
for (const elementName of findByClasses(dd, "element-name")) {
children.push(getTextContent(elementName).slice(1, -1));
}
}
}
if (elementsByTag[name]) {
console.info(`${name} element from SVG specification is skipped`);
continue;
}
const categories = name === "svg" ? ["flow", "phrasing"] : ["none"];
categories.unshift("svg-element");
elementsByTag[name] = {
description: "",
categories,
children,
};
}
}

const contentModel = `type Element = {
description: string;
categories: string[];
Expand Down
Loading