|
| 1 | +import { currentPath, isArray, isString } from '@universalweb/acid'; |
1 | 2 | import { decode, encode } from '#utilities/serialize';
|
2 | 3 | import { parse, parseFragment } from 'parse5';
|
3 |
| -// EXPERIMENTAL |
| 4 | +import { read } from '#utilities/file'; |
| 5 | +import zlib from 'zlib'; |
| 6 | +const dirname = currentPath(import.meta); |
| 7 | +// EXPERIMENTAL CONVERT HTML STRING TO COMPRESSED UML THEN COMPARE |
4 | 8 | function htmlToJson(node, index) {
|
5 |
| - console.log(node, index); |
6 |
| - if (node.length) { |
| 9 | + let tagName = node.tagName; |
| 10 | + if (isArray(node) && node.length) { |
7 | 11 | return node.map(htmlToJson);
|
8 | 12 | }
|
9 | 13 | if (node.nodeName === '#text') {
|
10 |
| - return node.value.trim() ? node.value : null; |
| 14 | + if (node.value.trim()) { |
| 15 | + return node.value; |
| 16 | + } |
| 17 | + return; |
| 18 | + } |
| 19 | + if (node.nodeName === '#documentType') { |
| 20 | + return [`!DOCTYPE ${node.name}`]; |
| 21 | + } |
| 22 | + if (node.nodeName === '#document-fragment') { |
| 23 | + // console.log('Document-fragment', node); |
| 24 | + return htmlToJson(node.childNodes); |
| 25 | + } |
| 26 | + if (node.nodeName === '#document') { |
| 27 | + // console.log('Document', node); |
| 28 | + return htmlToJson(node.childNodes); |
11 | 29 | }
|
12 | 30 | if (!node.tagName && node.childNodes?.length) {
|
| 31 | + // console.log(node.nodeName, node.tagName); |
13 | 32 | return htmlToJson(node.childNodes);
|
14 | 33 | }
|
15 |
| - const tagName = node.tagName; |
16 | 34 | const attributes = {};
|
17 | 35 | if (node.attrs) {
|
| 36 | + if (node.attrs.id) { |
| 37 | + tagName += `#${node.attrs.id}`; |
| 38 | + } |
| 39 | + if (node.attrs.class) { |
| 40 | + tagName += `.${node.attrs.class}`; |
| 41 | + } |
| 42 | + if (node.attrs.length) { |
| 43 | + tagName += ` `; |
| 44 | + } |
18 | 45 | for (const attr of node.attrs) {
|
19 |
| - attributes[attr.name] = attr.value; |
| 46 | + if (attr.name === 'class' || attr.name === 'id') { |
| 47 | + continue; |
| 48 | + } |
| 49 | + tagName += `${attr.name}="${attr.value}" `; |
20 | 50 | }
|
| 51 | + // tagName = tagName.trim(); |
21 | 52 | }
|
22 | 53 | const children = (node.childNodes || [])
|
23 | 54 | .map(htmlToJson)
|
24 | 55 | .filter((child) => {
|
25 |
| - return child !== null; |
26 |
| - }); // Remove null entries |
27 |
| - return [ |
28 |
| - tagName, Object.keys(attributes).length ? attributes : undefined, children.length ? children : undefined |
29 |
| - ].filter((x) => { |
| 56 | + if (child !== null && child !== undefined) { |
| 57 | + return child; |
| 58 | + } |
| 59 | + return false; |
| 60 | + }).filter((x) => { |
| 61 | + return x !== undefined; |
| 62 | + }); |
| 63 | + const result = []; |
| 64 | + if (tagName) { |
| 65 | + result[0] = tagName; |
| 66 | + if (children.length) { |
| 67 | + result.push(...children); |
| 68 | + } |
| 69 | + } else if (children.length) { |
| 70 | + result.push(...children); |
| 71 | + } |
| 72 | + return result.filter((x) => { |
30 | 73 | return x !== undefined;
|
31 | 74 | });
|
32 | 75 | }
|
| 76 | +async function unWrap(source) { |
| 77 | + return source; |
| 78 | +} |
33 | 79 | async function convertHtmlToMsgPack(html) {
|
34 |
| - const doc = await parseFragment(html); |
35 |
| - console.log(doc.childNodes); |
36 |
| - const jsonStructure = await htmlToJson(doc.childNodes); |
37 |
| - console.dir(jsonStructure, { |
38 |
| - depth: null, |
39 |
| - colors: true |
40 |
| - }); |
41 |
| - // return encode(jsonStructure); |
| 80 | + const doc = await parse(html); |
| 81 | + // console.log(doc); |
| 82 | + const jsonStructure = await htmlToJson(doc); |
| 83 | + if (jsonStructure) { |
| 84 | + const unWrapped = await unWrap(jsonStructure); |
| 85 | + return unWrapped; |
| 86 | + } |
42 | 87 | }
|
43 | 88 | // Example HTML
|
44 |
| -const htmlContent = `<div id="container"><h1>Hello</h1><p class="text">World</p></div>`; |
| 89 | +const htmlFile = await read(`${dirname}/resources/randomData.html`); |
| 90 | +const htmlContent = htmlFile.toString('utf8'); |
45 | 91 | // Convert and encode
|
46 |
| -const packedData = await convertHtmlToMsgPack(htmlContent); |
47 |
| -// console.log('MessagePack Buffer:', packedData); |
48 |
| -// console.log('MessagePack length:', packedData.length, Buffer.from(htmlContent, 'utf8').length); |
49 |
| -console.dir(decode(packedData), { |
50 |
| - depth: null, |
51 |
| - colors: true |
52 |
| -}); |
| 92 | +const packedDataStructure = await convertHtmlToMsgPack(htmlContent); |
| 93 | +// console.dir(packedDataStructure, { |
| 94 | +// depth: null, |
| 95 | +// colors: true |
| 96 | +// }); |
| 97 | +const packedData = encode(packedDataStructure); |
| 98 | +console.log('UML uncompressed size:', packedData.length); |
| 99 | +console.log('RAW HTML SIZE', htmlFile.length); |
| 100 | +const compressedString = zlib.brotliCompressSync(htmlFile); |
| 101 | +// zlib.brotliDecompressSync(compressedString).toString() |
| 102 | +console.log('compressedString', compressedString.length); |
| 103 | +const dictionary = Buffer.from(''); |
| 104 | +const compressOptions = { |
| 105 | + level: 9, |
| 106 | + memLevel: 9, |
| 107 | + windowBits: 15, |
| 108 | + dictionary |
| 109 | +}; |
| 110 | +const compressedUML = zlib.brotliCompressSync(packedData); |
| 111 | +const decompress = zlib.brotliDecompressSync(compressedUML); |
| 112 | +const decoded = decode(decompress); |
| 113 | +console.log('compressedUML', compressedUML.length, decompress.length); |
| 114 | +// console.log(zlib.gzipSync(packedData, compressOptions).length); |
| 115 | +// console.log(decoded[1]); |
| 116 | +// console.dir(decode(packedData), { |
| 117 | +// depth: null, |
| 118 | +// colors: true |
| 119 | +// }); |
0 commit comments