|
1 | 1 | 'use strict' |
2 | 2 |
|
3 | | -const {convertHtmlToMarkdown} = require('dom-to-semantic-markdown') |
4 | | -const {JSDOM} = require('jsdom') |
5 | | -const File = require('vinyl') |
6 | | - |
7 | | -function overrideElementProcessing (element) { |
| 3 | +const { NodeHtmlMarkdown } = require('node-html-markdown') |
| 4 | +const nhm = new NodeHtmlMarkdown() |
8 | 5 |
|
9 | | - if (element.tagName?.toLowerCase() === 'a') { |
10 | | - if (element.className === 'anchor') { |
11 | | - return [{type: 'custom', blank: true}] |
12 | | - } |
13 | | - let href = element.getAttribute('href') |
14 | | - const hasProtocol = /^[a-z]+:\/\//i |
15 | | - if (href && !href.match(hasProtocol)) { |
16 | | - href = href.replace(/\.html/, '.md') |
17 | | - const content = toMarkdown(element.innerHTML || href) |
18 | | - return [{type: 'link', href, content}] |
19 | | - } |
20 | | - } |
21 | | - |
22 | | - if (element.classList?.contains("admonitionblock")) { |
23 | | - element.classList.remove('admonitionblock') |
24 | | - const admonition = element.className.toUpperCase() |
25 | | - const content = toMarkdown( |
26 | | - element.querySelector("td.content").innerHTML) |
27 | | - |
28 | | - return [{ |
29 | | - type: 'custom', |
30 | | - admonition, |
31 | | - content |
32 | | - }] |
33 | | - } |
34 | | -} |
35 | | - |
36 | | -function renderCustomNode (node) { |
37 | | - if (node.blank) { |
38 | | - return '' |
39 | | - } |
40 | | - if (node.admonition) { |
41 | | - const body = node.content.split('\n').map(line => `> ${line}`).join('\n') |
42 | | - return `\n> [!${node.admonition}]\n${body}\n\n` |
43 | | - } |
44 | | -} |
45 | | - |
46 | | -function toMarkdown (html) { |
47 | | - const dom = new JSDOM(html) |
48 | | - const markdown = convertHtmlToMarkdown( |
49 | | - html, |
50 | | - { |
51 | | - overrideDOMParser: new dom.window.DOMParser(), |
52 | | - overrideElementProcessing, |
53 | | - renderCustomNode |
54 | | - } |
55 | | - ) |
56 | | - dom.window.close() |
57 | | - return markdown |
58 | | -} |
| 6 | +const File = require('vinyl') |
59 | 7 |
|
60 | 8 | function markdownify(page) { |
61 | 9 | const html = page.contents.toString() |
62 | 10 |
|
63 | 11 | const link = `[View original HTML](${page.pub.url})\n\n` |
64 | | - const markdown = toMarkdown(html) |
| 12 | + |
| 13 | + const markdown = link + nhm.translate(html) |
65 | 14 |
|
66 | 15 | page.out.path = page.out.path.replace(/\.html$/, '.md') |
67 | 16 | page.pub.url = page.pub.url.replace(/\.html$/, '.md') |
@@ -106,8 +55,18 @@ module.exports.register = function ({ playbook, config }) { |
106 | 55 | && page.pub |
107 | 56 | && page.out) |
108 | 57 |
|
109 | | - for (const page of pages) { |
110 | | - markdownify(page) |
| 58 | + const CHUNK_SIZE = 100 |
| 59 | + |
| 60 | + for (let i = 0; i < pages.length; i += CHUNK_SIZE) { |
| 61 | + const chunk = pages.slice(i, i + CHUNK_SIZE) |
| 62 | + |
| 63 | + for (const page of chunk) { |
| 64 | + markdownify(page) |
| 65 | + } |
| 66 | + |
| 67 | + if (i + CHUNK_SIZE < pages.length) { |
| 68 | + await new Promise(resolve => setImmediate(resolve)) |
| 69 | + } |
111 | 70 | } |
112 | 71 | }) |
113 | 72 | this.once('beforePublish', async ({ siteCatalog }) => { |
|
0 commit comments