diff --git a/builder/src/main.rs b/builder/src/main.rs index de062a0f..d1beb707 100644 --- a/builder/src/main.rs +++ b/builder/src/main.rs @@ -258,32 +258,41 @@ fn search_and_replace_file< // // These functions simplify common build-focused development tasks and support // CI builds. -/// After updating files in the client's Node files, perform some fix-ups. -fn patch_client_npm() -> io::Result<()> { - // Apply a the fixes described in - // [issue 27](https://github.com/bjones1/CodeChat_Editor/issues/27). - // - // Insert this line... - let patch = " - selectionNotFocus = this.view.state.facet(editable) ? focused : hasSelection(this.dom, this.view.observer.selectionRange)"; - // After this line. - let before_path = " let selectionNotFocus = !focused && !(this.view.state.facet(editable) || this.dom.tabIndex > -1) && - hasSelection(this.dom, this.view.observer.selectionRange) && !(activeElt && this.dom.contains(activeElt));"; - // First, see if the patch was applied already. - let index_js_path = Path::new("../client/node_modules/@codemirror/view/dist/index.js"); - let index_js = fs::read_to_string(index_js_path)?; - if !index_js.contains(patch) { - let patch_loc = index_js - .find(before_path) +/// Apply the provided patch to a file. +fn patch_file(patch: &str, before_patch: &str, file_path: &str) -> io::Result<()> { + let file_path = Path::new(file_path); + let file_contents = fs::read_to_string(file_path)?; + if !file_contents.contains(patch) { + let patch_loc = file_contents + .find(before_patch) .expect("Patch location not found.") - + before_path.len(); - let patched_index_js = format!( + + before_patch.len(); + let patched_file_contents = format!( "{}{patch}{}", - &index_js[..patch_loc], - &index_js[patch_loc..] + &file_contents[..patch_loc], + &file_contents[patch_loc..] ); - fs::write(index_js_path, &patched_index_js)?; + fs::write(file_path, &patched_file_contents)?; } + Ok(()) +} +/// After updating files in the client's Node files, perform some fix-ups. +fn patch_client_npm() -> io::Result<()> { + // Apply a the fixes described in + // [issue 27](https://github.com/bjones1/CodeChat_Editor/issues/27). + patch_file( + " + selectionNotFocus = this.view.state.facet(editable) ? focused : hasSelection(this.dom, this.view.observer.selectionRange)", + " let selectionNotFocus = !focused && !(this.view.state.facet(editable) || this.dom.tabIndex > -1) && + hasSelection(this.dom, this.view.observer.selectionRange) && !(activeElt && this.dom.contains(activeElt));", + "../client/node_modules/@codemirror/view/dist/index.js" + )?; + // In [older releases](https://www.tiny.cloud/docs/tinymce/5/6.0-upcoming-changes/#options), TinyMCE allowed users to change `whitespace_elements`; the whitespace inside these isn't removed by TinyMCE. However, this was removed in v6.0. Therefore, manually patch TinyMCE instead. + patch_file( + " wc-mermaid", + "const whitespaceElementsMap = createLookupTable('whitespace_elements', 'pre script noscript style textarea video audio iframe object code", + "../client/node_modules/tinymce/tinymce.js" + )?; // Copy across the parts of MathJax that are needed, since bundling it is // difficult. diff --git a/client/src/CodeChatEditor.mts b/client/src/CodeChatEditor.mts index d6e2fe23..7f20d98d 100644 --- a/client/src/CodeChatEditor.mts +++ b/client/src/CodeChatEditor.mts @@ -11,10 +11,10 @@ // details. // // You should have received a copy of the GNU General Public License along with -// the CodeChat Editor. If not, see -// [http://www.gnu.org/licenses](http://www.gnu.org/licenses). +// the CodeChat Editor. If not, see[http://www.gnu.org/licenses](http://www.gnu.org/licenses). // -// # `CodeChatEditor.mts` -- the CodeChat Editor Client +// `CodeChatEditor.mts` -- the CodeChat Editor Client +// ================================================== // // The overall process of load a file is: // @@ -37,13 +37,12 @@ // 3. The server then transforms these code/doc blocks into source code, then // writes this code to the disk. // -// ## Imports +// Imports +// ------- // // ### JavaScript/TypeScript // // #### Third-party -import prettier from "prettier/esm/standalone.mjs"; -import parserMarkdown from "prettier/esm/parser-markdown.mjs"; import TurndownService from "./turndown/turndown.browser.es.js"; import { gfm } from "./turndown/turndown-plugin-gfm.browser.es.js"; import "./wc-mermaid/wc-mermaid"; @@ -56,19 +55,18 @@ import { } from "./CodeMirror-integration.mjs"; import "./EditorComponents.mjs"; import "./graphviz-webcomponent-setup.mts"; -// This must be imported _after_ the previous setup import, so it's placed here, -// instead of in the third-party category above. +// This must be imported*after* the previous setup import, so it's placed here, instead of in the +// third-party category above. import "graphviz-webcomponent"; import { tinymce, init, Editor } from "./tinymce-config.mjs"; // ### CSS import "./css/CodeChatEditor.css"; -// ## Data structures +// Data structures +// --------------- // -// Define all possible editor modes; these are passed as -// a [query string](https://en.wikipedia.org/wiki/Query_string) -// (`http://path/to/foo.py?mode=toc`, for example) to the page's URL. +// Define all possible editor modes; these are passed as a[query string](https://en.wikipedia.org/wiki/Query_string) (`http://path/to/foo.py?mode=toc`, for example) to the page's URL. enum EditorMode { // Display the source code using CodeChat, but disallow editing. view, @@ -81,8 +79,7 @@ enum EditorMode { raw, } -// Since this is experimental, TypeScript doesn't define it. See the -// [docs](https://developer.mozilla.org/en-US/docs/Web/API/NavigateEvent). +// Since this is experimental, TypeScript doesn't define it. See the[docs](https://developer.mozilla.org/en-US/docs/Web/API/NavigateEvent). interface NavigateEvent extends Event { canIntercept: boolean; destination: any; @@ -111,7 +108,8 @@ declare global { } } -// ## Globals +// Globals +// ------- // // The ID of the autosave timer; when this timer expires, the document will be // autosaved. @@ -122,8 +120,7 @@ let autosaveEnabled = true; // Store the lexer info for the currently-loaded language. // -// This mirrors the data provided by the server -- -// see [SourceFileMetadata](../../server/src/webserver.rs#SourceFileMetadata). +// This mirrors the data provided by the server -- see[SourceFileMetadata](../../server/src/webserver.rs#SourceFileMetadata). let current_metadata: { mode: string; }; @@ -133,20 +130,19 @@ let is_dirty = false; // ### Markdown to HTML conversion // -// Instantiate [turndown](https://github.com/mixmark-io/turndown) for HTML to -// Markdown conversion +// Instantiate[turndown](https://github.com/mixmark-io/turndown) for HTML to Markdown conversion const turndownService = new TurndownService({ br: "\\", codeBlockStyle: "fenced", renderAsPure: false, + wordWrap: [80, 40], }); -// Add the plugins from -// [turndown-plugin-gfm](https://github.com/laurent22/joplin/tree/dev/packages/turndown-plugin-gfm) -// to enable conversions for tables, task lists, and strikethroughs. +// Add the plugins from[turndown-plugin-gfm](https://github.com/laurent22/joplin/tree/dev/packages/turndown-plugin-gfm) to enable conversions for tables, task lists, and strikethroughs. turndownService.use(gfm); -// ## Page initialization +// Page initialization +// ------------------- // // Load the dynamic content into the static page. export const page_init = () => { @@ -176,8 +172,7 @@ export const set_is_dirty = (value: boolean = true) => { is_dirty = value; }; -// This is copied from -// [MDN](https://developer.mozilla.org/en-US/docs/Web/API/Document/DOMContentLoaded_event#checking_whether_loading_is_already_complete). +// This is copied from[MDN](https://developer.mozilla.org/en-US/docs/Web/API/Document/DOMContentLoaded_event#checking_whether_loading_is_already_complete). const on_dom_content_loaded = (on_load_func: () => void) => { if (document.readyState === "loading") { // Loading hasn't finished yet. @@ -188,7 +183,8 @@ const on_dom_content_loaded = (on_load_func: () => void) => { } }; -// ## File handling +// File handling +// ------------- // // True if this is a CodeChat Editor document (not a source file). const is_doc_only = () => { @@ -205,23 +201,17 @@ const open_lp = async (all_source: CodeChatForWeb) => // web page with the results. const _open_lp = async ( // A data structure provided by the server, containing the source and - // associated metadata. See [`AllSource`](#AllSource). + // associated metadata. See[`AllSource`](#AllSource). all_source: CodeChatForWeb, ) => { - // Use - // [URLSearchParams](https://developer.mozilla.org/en-US/docs/Web/API/URLSearchParams) - // to parse out the search parameters of this window's URL. + // Use[URLSearchParams](https://developer.mozilla.org/en-US/docs/Web/API/URLSearchParams) to parse out the search parameters of this window's URL. const urlParams = new URLSearchParams(window.location.search); - // Get the mode from the page's query parameters. Default to edit using the - // [nullish coalescing operator](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Nullish_coalescing_operator). - // This works, but TypeScript marks it as an error. Ignore this error by - // including the - // [@ts-ignore directive](https://www.typescriptlang.org/docs/handbook/intro-to-js-ts.html#ts-check). + // Get the mode from the page's query parameters. Default to edit using the[nullish coalescing operator](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Nullish_coalescing_operator). This works, but TypeScript marks it as an error. Ignore this error by + // including the[@ts-ignore directive](https://www.typescriptlang.org/docs/handbook/intro-to-js-ts.html#ts-check). /// @ts-ignore const editorMode = EditorMode[urlParams.get("mode") ?? "edit"]; - // Get the current_metadata - // from the provided `all_source` struct and store it as a global variable. + // Get thecurrent_metadata from the provided`all_source` struct and store it as a global variable. current_metadata = all_source["metadata"]; const source = all_source["source"]; const codechat_body = document.getElementById( @@ -232,31 +222,22 @@ const _open_lp = async ( clearAutosaveTimer(); // Before calling any MathJax, make sure it's fully loaded. await window.MathJax.startup.promise; - // Per the - // [docs](https://docs.mathjax.org/en/latest/web/typeset.html#updating-previously-typeset-content), - // "If you modify the page to remove content that contains typeset + // Per the[docs](https://docs.mathjax.org/en/latest/web/typeset.html#updating-previously-typeset-content), "If you modify the page to remove content that contains typeset // mathematics, you will need to tell MathJax about that so that it knows // the typeset math that you are removing is no longer on the page." window.MathJax.typesetClear(codechat_body); if (is_doc_only()) { if (tinymce.activeEditor === null) { - // Special case: a CodeChat Editor document's HTML is stored in - // `source.doc`. We don't need the CodeMirror editor at all; - // instead, treat it like a single doc block contents div. + // Special case: a CodeChat Editor document's HTML is stored in`source.doc`. We don't need the CodeMirror editor at all; instead, treat it + // like a single doc block contents div. codechat_body.innerHTML = `
${source.doc}
`; await init({ selector: ".CodeChat-doc-contents", // In the doc-only mode, add autosave functionality. While there - // is an - // [autosave plugin](https://www.tiny.cloud/docs/tinymce/6/autosave/), - // this autosave functionality is completely different from the - // autosave provided here. Per - // [handling editor events](https://www.tiny.cloud/docs/tinymce/6/events/#handling-editor-events), - // this is how to create a TinyMCE event handler. + // is an[autosave plugin](https://www.tiny.cloud/docs/tinymce/6/autosave/), this autosave functionality is completely different from + // the autosave provided here. Per[handling editor events](https://www.tiny.cloud/docs/tinymce/6/events/#handling-editor-events), this is how to create a TinyMCE event handler. setup: (editor: Editor) => { - // The - // [editor core events list](https://www.tiny.cloud/docs/tinymce/6/events/#editor-core-events) - // includes the `Dirty` event. + // The[editor core events list](https://www.tiny.cloud/docs/tinymce/6/events/#editor-core-events) includes the`Dirty` event. editor.on("Dirty", (_event: Event) => { is_dirty = true; startAutosaveTimer(); @@ -265,9 +246,7 @@ const _open_lp = async ( }); tinymce.activeEditor!.focus(); } else { - // Save and restore cursor/scroll location after an update per the - // [docs](https://www.tiny.cloud/docs/tinymce/6/apis/tinymce.dom.bookmarkmanager). - // However, this doesn't seem to work for the cursor location. + // Save and restore cursor/scroll location after an update per the[docs](https://www.tiny.cloud/docs/tinymce/6/apis/tinymce.dom.bookmarkmanager). However, this doesn't seem to work for the cursor location. // Perhaps when TinyMCE normalizes the document, this gets lost? const bm = tinymce.activeEditor!.selection.getBookmark(); tinymce.activeEditor!.setContent(source.doc); @@ -279,9 +258,7 @@ const _open_lp = async ( } autosaveEnabled = true; - // If tests should be run, then the - // [following global variable](CodeChatEditor-test.mts#CodeChatEditor_test) - // is function that runs them. + // If tests should be run, then the[following global variable](CodeChatEditor-test.mts#CodeChatEditor_test) is function that runs them. if (typeof window.CodeChatEditor_test === "function") { window.CodeChatEditor_test(); } @@ -304,8 +281,7 @@ const save_lp = async () => { // div. tinymce.activeEditor!.save(); const html = tinymce.activeEditor!.getContent(); - const markdown = turndownService.turndown(html); - source.doc = await prettier_markdown(markdown, 80); + source.doc = turndownService.turndown(html); source.doc_blocks = []; // Retypeset all math after saving the document. mathJaxTypeset(codechat_body); @@ -327,9 +303,7 @@ const save_lp = async () => { return update; }; -// Per -// [MDN](https://developer.mozilla.org/en-US/docs/Web/API/Navigator/platform#examples), -// here's the least bad way to choose between the control key and the command +// Per[MDN](https://developer.mozilla.org/en-US/docs/Web/API/Navigator/platform#examples), here's the least bad way to choose between the control key and the command // key. const os_is_osx = navigator.platform.indexOf("Mac") === 0 || navigator.platform === "iPhone" @@ -341,8 +315,8 @@ const on_save = async (only_if_dirty: boolean = false) => { if (only_if_dirty && !is_dirty) { return; } - // Save the provided contents back to the filesystem, by - // sending an update message over the websocket. + // Save the provided contents back to the filesystem, by sending an update + // message over the websocket. const webSocketComm = parent.window.CodeChatEditorFramework.webSocketComm; console.log("Sent Update - saving document."); await new Promise(async (resolve) => { @@ -354,46 +328,19 @@ const on_save = async (only_if_dirty: boolean = false) => { }; const codechat_html_to_markdown = async (source: any) => { - // Join all the doc blocks, then convert them to Markdown, then split them - // back. - // - // Turndown currently removes HTML blocks with no content; add placeholder - // content to avoid this. - const separator = "a"; - const placeholder_html = "

a

"; - const placeholder_markdown = "a"; - // Replace empty doc blocks (which Turndown will remove) with a placeholder - // to prevent their removal; pass non-empty content for standard Turndown - // processing. - const combined_doc_blocks_html = source.doc_blocks - .map((doc_block_JSON: DocBlockJSON) => - doc_block_JSON[4].trim() ? doc_block_JSON[4] : placeholder_html, - ) - .join(separator); - const combined_doc_blocks_markdown = turndownService.turndown( - combined_doc_blocks_html, - ); - const doc_blocks_markdown = combined_doc_blocks_markdown - .split(separator) - .map((s: string) => s.trim()); - // Wrap each doc block based on the available width on this line: 80 - - // indent - delimiter length - 1 space that always follows the delimiter. - // Use a minimum width of 40 characters. - for (const [index, doc_block] of source.doc_blocks.entries()) { - const dbm = doc_blocks_markdown[index]; + const entries = source.doc_blocks.entries(); + for (const [index, doc_block] of entries) { + const wordWrapMargin = Math.max( + 40, + 80 - doc_block[3].length - doc_block[2].length - 1, + ); + turndownService.options['wordWrap'] = [wordWrapMargin, 40]; doc_block[4] = - (await prettier_markdown( - // Replace the placeholder here, so it won't be wrapped by - // Prettier. - dbm == placeholder_markdown ? "" : dbm, - Math.max( - 40, - 80 - doc_block[3].length - doc_block[2].length - 1, - ), - // Prettier trims whitespace; we can't include the newline in - // the replacement above. So, put it here. - )) || "\n"; + (index == entries.length - 1 + ? turndownService.last(doc_block[4]) + : turndownService.next(doc_block[4])) + "\n"; } + turndownService.options['wordWrap'] = [80, 40]; }; // ### Autosave feature @@ -420,37 +367,14 @@ const clearAutosaveTimer = () => { } }; -// User `prettier` to word-wrap Markdown before saving it. -const prettier_markdown = async (markdown: string, print_width: number) => { - return await prettier.format(markdown, { - // See - // [prettier from ES modules](https://prettier.io/docs/en/browser.html#es-modules). - parser: "markdown", - // TODO: - // - // - Unfortunately, Prettier doesn't know how to format HTML embedded in - // Markdown; see - // [issue 8480](https://github.com/prettier/prettier/issues/8480). - // - Prettier formats headings using the ATX style; this isn't - // configurable per the - // [source](https://github.com/prettier/prettier/blob/main/src/language-markdown/printer-markdown.js#L228). - plugins: [parserMarkdown], - // See [prettier options](https://prettier.io/docs/en/options.html). - printWidth: print_width, - // Without this option, most lines aren't wrapped. - proseWrap: "always", - }); -}; - -// ## Navigation +// Navigation +// ---------- // // The TOC and this page calls this when a hyperlink is clicked. This saves the // current document before navigating. const on_navigate = (navigateEvent: NavigateEvent) => { if ( - // Some of this was copied from - // [Modern client-side routing: the Navigation API](https://developer.chrome.com/docs/web-platform/navigation-api/#deciding_how_to_handle_a_navigation). - // If we're navigating within the document, ignore this. + // Some of this was copied from[Modern client-side routing: the Navigation API](https://developer.chrome.com/docs/web-platform/navigation-api/#deciding_how_to_handle_a_navigation). If we're navigating within the document, ignore this. navigateEvent.hashChange || // If this is a download, let the browser perform the download. navigateEvent.downloadRequest || @@ -467,8 +391,7 @@ const on_navigate = (navigateEvent: NavigateEvent) => { return; } - // If the IDE initiated this navigation via a `CurrentFile` message, then - // allow it. + // If the IDE initiated this navigation via a`CurrentFile` message, then allow it. if (window.CodeChatEditor.allow_navigation) { // We don't need to reset this flag, since this window will be reloaded. return; @@ -530,13 +453,12 @@ const save_then_navigate = (codeChatEditorUrl: URL) => { }); }; -// ## Testing +// Testing +// ------- // -// A great and simple idea taken from -// [SO](https://stackoverflow.com/a/54116079): wrap all testing exports in a -// single variable. This avoids namespace pollution, since only one name is -// exported, and it's clearly marked for testing only. Test code still gets -// access to everything it needs. +// A great and simple idea taken from[SO](https://stackoverflow.com/a/54116079): wrap all testing exports in a single variable. This avoids namespace +// pollution, since only one name is exported, and it's clearly marked for +// testing only. Test code still gets access to everything it needs. export const exportedForTesting = { codechat_html_to_markdown, }; diff --git a/client/src/turndown/turndown.browser.es.d.ts b/client/src/turndown/turndown.browser.es.d.ts index ccadb823..894d8ffa 100644 --- a/client/src/turndown/turndown.browser.es.d.ts +++ b/client/src/turndown/turndown.browser.es.d.ts @@ -1,8 +1,13 @@ -// # `turndown.browser.es.d.ts` - TypeScript ignores for the Turndown library +// `turndown.browser.es.d.ts` - TypeScript ignores for the Turndown library +// ======================================================================== +// // This suppress type errors when using the Turndown library. declare class TurndownService { constructor(options: any); - use(_: any): any; - turndown(_: any): any; + use(_: Function|Array): any; + turndown(_: string|HTMLElement): string; + next(_: string|HTMLElement): string; + last(_: string|HTMLElement): string; + options: {[name: string]: any} } export default TurndownService; diff --git a/client/src/turndown/turndown.browser.es.js b/client/src/turndown/turndown.browser.es.js index 5e261968..2217b313 100644 --- a/client/src/turndown/turndown.browser.es.js +++ b/client/src/turndown/turndown.browser.es.js @@ -77,32 +77,51 @@ function has (node, tagNames) { function Node (node, options) { node.isBlock = isBlock(node); - node.isCode = node.nodeName === 'CODE' || node.parentNode.isCode; + node.isCode = node.nodeName === 'CODE' || node.nodeName === 'WC-MERMAID' || node.parentNode.isCode; node.isBlank = isBlank(node); node.flankingWhitespace = flankingWhitespace(node, options); - // When true, this node will be rendered as pure Markdown; false indicates it will be rendered using HTML. A value of true can indicate either that the source HTML can be perfectly captured as Markdown, or that the source HTML will be approximated as Markdown by discarding some HTML attributes (options.renderAsPure === true). Note that the value computed below is an initial estimate, which may be updated by a rule's `pureAttributes` property. + // When true, this node will be rendered as pure Markdown; false indicates it + // will be rendered using HTML. A value of true can indicate either that the + // source HTML can be perfectly captured as Markdown, or that the source HTML + // will be approximated as Markdown by discarding some HTML attributes + // (options.renderAsPure === true). Note that the value computed below is an + // initial estimate, which may be updated by a rule's `pureAttributes` + // property. node.renderAsPure = options.renderAsPure || node.attributes === undefined || node.attributes.length === 0; - // Given a dict of attributes that an HTML element may contain and still be convertable to pure Markdown, update the `node.renderAsPure` attribute. The keys of the dict define allowable attributes; the values define the value allowed for that key. If the value is `undefined`, then any value is allowed for the given key. + // Given a dict of attributes that an HTML element may contain and still be + // convertable to pure Markdown, update the `node.renderAsPure` attribute. The + // keys of the dict define allowable attributes; the values define the value + // allowed for that key. If the value is `undefined`, then any value is + // allowed for the given key. node.addPureAttributes = (d) => { - // Only perform this check if the node isn't pure and there's something to check. Note that `d.length` is always `undefined` (JavaScript is fun). + // Only perform this check if the node isn't pure and there's something to + // check. Note that `d.length` is always `undefined` (JavaScript is fun). if (!node.renderAsPure && Object.keys(d).length) { - // Check to see how many of the allowed attributes match the actual attributes. + // Check to see how many of the allowed attributes match the actual + // attributes. let allowedLength = 0; for (const [key, value] of Object.entries(d)) { if (key in node.attributes && (value === undefined || node.attributes[key].value === value)) { ++allowedLength; } } - // If the lengths are equal, then every attribute matched with an allowed attribute: this node is representable in pure Markdown. + // If the lengths are equal, then every attribute matched with an allowed + // attribute: this node is representable in pure Markdown. if (node.attributes.length === allowedLength) { node.renderAsPure = true; } } }; - // Provide a means to escape HTML to confirm to Markdown's requirements. This happens only inside preformatted code blocks, where `collapseWhitespace` avoids removing newlines. - node.cleanOuterHTML = () => node.outerHTML.replace(/\n/g, ' ').replace(/\r/g, ' '); - // Output the provided string if `node.renderAsPure`; otherwise, output `node.outerHTML`. + // Provide a means to escape HTML to conform to Markdown's requirements: + // inside raw HTML, one + // [end condition](https://spec.commonmark.org/0.31.2/#html-blocks) is a blank + // line (two consecutive newlines). To avoid this, escape newline pairs. Note: + // this is a bit conservative, since some tags end only with a closing tag, + // not on a newline. + node.cleanOuterHTML = () => node.outerHTML.replace(/\n\n/g, '\n ').replace(/\r\r/g, '\r ').replace(/\n\r\n\r/g, '\n\r ').replace(/\r\n\r\n/g, '\r\n '); + // Output the provided string if `node.renderAsPure`; otherwise, output + // `node.outerHTML`. node.ifPure = (str) => node.renderAsPure ? str : node.cleanOuterHTML(); return node } @@ -174,13 +193,100 @@ function isFlankedByWhitespace (side, node, options) { return isFlanked } +/*! + * word-wrap + * + * Copyright (c) 2014-2023, Jon Schlinkert. + * Released under the MIT License. + */ + +function trimEnd(str) { + let lastCharPos = str.length - 1; + let lastChar = str[lastCharPos]; + while(lastChar === ' ' || lastChar === '\t') { + lastChar = str[--lastCharPos]; + } + return str.substring(0, lastCharPos + 1); +} + +function trimTabAndSpaces(str) { + const lines = str.split('\n'); + const trimmedLines = lines.map((line) => trimEnd(line)); + return trimmedLines.join('\n'); +} + +var wordWrap = function(str, options) { + options = options || {}; + if (str == null) { + return str; + } + + var width = options.width || 50; + var indent = (typeof options.indent === 'string') + ? options.indent + : ' '; + + var newline = options.newline || '\n' + indent; + var escape = typeof options.escape === 'function' + ? options.escape + : identity; + + var regexString = '.{1,' + width + '}'; + if (options.cut !== true) { + regexString += '([\\s\u200B]+|$)|[^\\s\u200B]+?([\\s\u200B]+|$)'; + } + + var re = new RegExp(regexString, 'g'); + var lines = str.match(re) || []; + var result = indent + lines.map(function(line) { + if (line.slice(-1) === '\n') { + line = line.slice(0, line.length - 1); + } + return escape(line); + }).join(newline); + + if (options.trim === true) { + result = trimTabAndSpaces(result); + } + return result; +}; + +function identity(str) { + return str; +} + +// Determine the approximate left indent. It will be incorrect for list items +// whose numbers are over two digits. +const approxLeftIndent = (node) => { + let leftIndent = 0; + while (node) { + if (node.nodeName === 'BLOCKQUOTE') { + leftIndent += 2; + } else if (node.nodeName === 'UL' || node.nodeName === 'OL') { + leftIndent += 4; + } + node = node.parentNode; + } + return leftIndent +}; + +// Wrap the provided text if so requested by the options. +const wrapContent = (content, node, options) => { + if (!options.wordWrap.length) { + return content + } + const [wordWrapColumn, wordWrapMinWidth] = options.wordWrap; + const wrapWidth = Math.max(wordWrapColumn - approxLeftIndent(node), wordWrapMinWidth); + return wordWrap(content, {width: wrapWidth, indent: '', trim: true}) +}; + var rules = {}; rules.paragraph = { filter: 'p', - replacement: function (content) { - return '\n\n' + content + '\n\n' + replacement: function (content, node, options) { + return '\n\n' + wrapContent(content, node, options) + '\n\n' } }; @@ -196,10 +302,15 @@ rules.heading = { filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'], replacement: function (content, node, options) { + content = wrapContent(content, node, options); var hLevel = Number(node.nodeName.charAt(1)); if (options.headingStyle === 'setext' && hLevel < 3) { - var underline = repeat((hLevel === 1 ? '=' : '-'), content.length); + // Split the contents into lines, then find the longest line length. + const splitContent = content.split(/\r\n|\n|\r/); + // From [SO](https://stackoverflow.com/a/43304999/16038919). + const maxLineLength = Math.max(...(splitContent.map(el => el.length))); + var underline = repeat((hLevel === 1 ? '=' : '-'), maxLineLength); return ( '\n\n' + content + '\n' + underline + '\n\n' ) @@ -212,7 +323,8 @@ rules.heading = { rules.blockquote = { filter: 'blockquote', - replacement: function (content) { + replacement: function (content, node, options) { + content = wrapContent(content, node, options); content = content.replace(/^\n+|\n+$/g, ''); content = content.replace(/^/gm, '> '); return '\n\n' + content + '\n\n' @@ -222,7 +334,9 @@ rules.blockquote = { rules.list = { filter: ['ul', 'ol'], pureAttributes: function (node, options) { - // When rendering in faithful mode, check that all children are `
  • ` elements that can be faithfully rendered. If not, this must be rendered as HTML. + // When rendering in faithful mode, check that all children are `
  • ` + // elements that can be faithfully rendered. If not, this must be rendered + // as HTML. if (!options.renderAsPure) { var childrenPure = Array.prototype.reduce.call(node.childNodes, (previousValue, currentValue) => @@ -231,7 +345,8 @@ rules.list = { (new Node(currentValue, options)).renderAsPure, true ); if (!childrenPure) { - // If any of the children must be rendered as HTML, then this node must also be rendered as HTML. + // If any of the children must be rendered as HTML, then this node must + // also be rendered as HTML. node.renderAsPure = false; return } @@ -268,17 +383,52 @@ rules.listItem = { const suffix = '.'; const padding = (digits > spaces ? digits + 1 : spaces + 1) + suffix.length; // increase padding if beyond 99 prefix = (itemNumber + suffix).padEnd(padding); - content = content.replace(/\n/gm, '\n '.padEnd(1 + padding)); + // Indent all non-blank lines. + content = content.replace(/\n(.+)/gm, '\n '.padEnd(1 + padding) + '$1'); } else { prefix = options.bulletListMarker + ' '.padEnd(1 + spaces); - content = content.replace(/\n/gm, '\n '.padEnd(3 + spaces)); // indent + // Indent all non-blank lines. + content = content.replace(/\n(.+)/gm, '\n '.padEnd(3 + spaces) + '$1'); } return ( - prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '') + prefix + content + (node.nextSibling && !content.endsWith('\n\n') ? '\n' : '') ) } }; +// Determine if a code block is pure. It accepts the following structure: +// +// ```HTML +//
    +//   code contents, including newlines
    +//   ...then 0 or more of either:
    +//   
    <-- this is translated to a newline +// more code +//
    +// ``` +let codeBlockPureAttributes = (node, options, isFenced) => { + // Check the purity of the child block(s) which contain the code. + node.renderAsPure = options.renderAsPure || (node.childNodes.length > 0 && Array.prototype.reduce.call(node.childNodes, (accumulator, childNode) => { + const cn = new Node(childNode, options); + // All previous siblings are pure and... + return accumulator && ( + // ... it's either a `br` (which cannot have children) ... + (cn.nodeName === 'BR' && cn.attributes.length === 0) || + // ... or a `code` element which has ... + (cn.nodeName === 'CODE' && + // ... no attributes or (for a fenced code block) a class attribute + // containing a language name... + (cn.attributes.length === 0 || (isFenced && cn.attributes.length === 1 && cn.className.match(/language-(\S+)/))) && + // ... only one child... + cn.childNodes.length === 1 && + // ... containing text, ... + cn.firstChild.nodeType === 3 + ) + ) + // ... then this node and its subtree are pure. + }, true)); +}; + rules.indentedCodeBlock = { filter: function (node, options) { return ( @@ -289,14 +439,7 @@ rules.indentedCodeBlock = { ) }, - pureAttributes: function (node, options) { - // Check the purity of the child block(s) which contain the code. - node.renderAsPure = options.renderAsPure || (node.renderAsPure && ( - // There's only one child (the code element), and it's pure. - new Node(node.firstChild, options)).renderAsPure && node.childNodes.length === 1 && - // There's only one child of this code element, and it's text. - node.firstChild.childNodes.length === 1 && node.firstChild.firstChild.nodeType === 3); - }, + pureAttributes: (node, options) => codeBlockPureAttributes(node, options, false), replacement: function (content, node, options) { return ( @@ -317,26 +460,14 @@ rules.fencedCodeBlock = { ) }, - pureAttributes: function (node, options) { - // Check the purity of the child code element. - var firstChild = new Node(node.firstChild, options); - var className = firstChild.getAttribute('class') || ''; - var language = (className.match(/language-(\S+)/) || [null, ''])[1]; - // Allow the matched classname as pure Markdown. Compare using the `className` attribute, since the `class` attribute returns an object, not an easily-comparable string. - if (language) { - firstChild.renderAsPure = firstChild.renderAsPure || firstChild.className === `language-${language}`; - } - node.renderAsPure = options.renderAsPure || (node.renderAsPure && - // There's only one child (the code element), and it's pure. - firstChild.renderAsPure && node.childNodes.length === 1 && - // There's only one child of this code element, and it's text. - node.firstChild.childNodes.length === 1 && node.firstChild.firstChild.nodeType === 3); - }, + pureAttributes: (node, options) => codeBlockPureAttributes(node, options, true), replacement: function (content, node, options) { var className = node.firstChild.getAttribute('class') || ''; var language = (className.match(/language-(\S+)/) || [null, ''])[1]; - var code = node.firstChild.textContent; + // In the HTML, combine the text inside `code` tags while translating `br` + // tags to a newline. + var code = Array.prototype.reduce.call(node.childNodes, (accumulator, childNode) => accumulator + (childNode.tagName === 'BR' ? '\n' : childNode.textContent), ''); var fenceChar = options.fence.charAt(0); var fenceSize = 3; @@ -641,7 +772,7 @@ function collapseWhitespace (options) { var isBlock = options.isBlock; var isVoid = options.isVoid; var isPre = options.isPre || function (node) { - return node.nodeName === 'PRE' + return node.nodeName === 'PRE' || node.nodeName === 'WC-MERMAID' }; var renderAsPure = options.renderAsPure; @@ -890,25 +1021,27 @@ var escapes = [ [/^>/g, '\\>'], [/_/g, '\\_'], [/^(\d+)\. /g, '$1\\. '], - // Per [section 6.6 of the CommonMark spec](https://spec.commonmark.org/0.30/#raw-html), - // Raw HTML, CommonMark recognizes and passes through HTML-like tags and - // their contents. Therefore, Turndown needs to escape text that would parse - // as an HTML-like tag. This regex recognizes these tags and escapes them by + // Per + // [section 6.6 of the CommonMark spec](https://spec.commonmark.org/0.30/#raw-html), + // Raw HTML, CommonMark recognizes and passes through HTML-like tags and their + // contents. Therefore, Turndown needs to escape text that would parse as an + // HTML-like tag. This regex recognizes these tags and escapes them by // inserting a leading backslash. [new RegExp(HTMLTAG, 'g'), '\\$&'], - // Likewise, [section 4.6 of the CommonMark spec](https://spec.commonmark.org/0.30/#html-blocks), + // Likewise, + // [section 4.6 of the CommonMark spec](https://spec.commonmark.org/0.30/#html-blocks), // HTML blocks, requires the same treatment. // // This regex was copied from `commonmark.js/lib/blocks.js`, the // `reHtmlBlockOpen` variable. We only need regexps for patterns not matched // by the previous pattern, so this doesn't need all expressions there. // - // TODO: this is too aggressive; it should only recognize this pattern at - // the beginning of a line of CommonnMark source; these will recognize the - // pattern at the beginning of any inline or block markup. The approach I - // tried was to put this in `commonmark-rules.js` for the `paragraph` and - // `heading` rules (the only block beginning-of-line rules). However, text - // outside a paragraph/heading doesn't get escaped in this case. + // TODO: this is too aggressive; it should only recognize this pattern at the + // beginning of a line of CommonnMark source; these will recognize the pattern + // at the beginning of any inline or block markup. The approach I tried was to + // put this in `commonmark-rules.js` for the `paragraph` and `heading` rules + // (the only block beginning-of-line rules). However, text outside a + // paragraph/heading doesn't get escaped in this case. [/^<(?:script|pre|textarea|style)(?:\s|>|$)/i, '\\$&'], [/^<[/]?(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)(?:\s|[/]?[>]|$)/i, '\\$&'] ]; @@ -929,8 +1062,16 @@ function TurndownService (options) { linkReferenceStyle: 'full', br: ' ', preformattedCode: false, - // Should the output be pure (pure Markdown, with no HTML blocks; this discards any HTML input that can't be represented in "pure" Markdown) or faithful (any input HTML that can't be exactly duplicated using Markdwon remains HTML is the resulting output)? This is `false` by default, following the original author's design. + // Should the output be pure (pure Markdown, with no HTML blocks; this + // discards any HTML input that can't be represented in "pure" Markdown) or + // faithful (any input HTML that can't be exactly duplicated using Markdwon + // remains HTML is the resulting output)? This is `false` by default, + // following the original author's design. renderAsPure: true, + // An array of \[word wrap column, minimum word wrap width\] indicates that + // the output should be word wrapped based on these parameters; otherwise, + // en empty list indicates no wrapping. + wordWrap: [], blankReplacement: function (content, node) { return node.isBlock ? '\n\n' : '' }, @@ -938,7 +1079,8 @@ function TurndownService (options) { return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML }, defaultReplacement: function (content, node, options) { - // A hack: for faithful output, always produce the HTML, rather than the content. To get this, tell the node it's impure. + // A hack: for faithful output, always produce the HTML, rather than the + // content. To get this, tell the node it's impure. node.renderAsPure = options.renderAsPure; return node.isBlock ? '\n\n' + node.ifPure(content) + '\n\n' : node.ifPure(content) } @@ -969,6 +1111,40 @@ TurndownService.prototype = { return postProcess.call(this, output) }, + /** + * Like `turndown`, but functions like an iterator, so that the HTML to convert + * is delivered in a sequnce of calls this method, then a single call to `last`. + * @public + * @param {String|HTMLElement} input The string or DOM node to convert + * @returns A Markdown representation of the input + * @type String + */ + + next: function (input) { + if (!canConvert(input)) { + throw new TypeError( + input + ' is not a string, or an element/document/fragment node.' + ) + } + + if (input === '') return '' + + var output = process.call(this, new RootNode(input, this.options)); + return cleanEmptyLines(output) + }, + + /** + * See `next`; this finalizes the Markdown output produced by call to `next`. + * @public + * @param {String|HTMLElement} input The string or DOM node to convert + * @returns A Markdown representation of the input + * @type String + */ + + last: function (input) { + this.turndown(input); + }, + /** * Add one or more plugins * @public @@ -1043,6 +1219,11 @@ TurndownService.prototype = { } }; +// These HTML elements are considered block nodes, as opposed to inline nodes. It's based on the Commonmark spec's selection of [HTML blocks](https://spec.commonmark.org/0.31.2/#html-blocks). +const blockNodeNames = new Set([ + 'PRE', 'SCRIPT', 'STYLE', 'TEXTAREA', 'ADDRESS', 'ARTICLE', 'ASIDE', 'BASE', 'BASEFONT', 'BLOCKQUOTE', 'BODY', 'CAPTION', 'CENTER', 'COL', 'COLGROUP', 'DD', 'DETAILS', 'DIALOG', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE', 'FOOTER', 'FORM', 'FRAME', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEAD', 'HEADER', 'HR', 'HTML', 'IFRAME', 'LEGEND', 'LI', 'LINK', 'MAIN', 'MENU', 'MENUITEM', 'NAV', 'NOFRAMES', 'OL', 'OPTGROUP', 'OPTION', 'P', 'PARAM', 'SEARCH', 'SECTION', 'SUMMARY', 'TABLE', 'TBODY', 'TD', 'TFOOT', 'TH', 'THEAD', 'TITLE', 'TR', 'TRACK', 'UL' +]); + /** * Reduces a DOM node down to its Markdown string equivalent * @private @@ -1053,35 +1234,57 @@ TurndownService.prototype = { function process (parentNode) { var self = this; - // Note that the root node passed to Turndown isn't translated -- only its children, since the root node is simply a container (a div or body tag) of items to translate. Only the root node's `renderAsPure` attribute is undefined; treat it as pure, since we never translate this node. + const isLi = parentNode.nodeName === 'LI'; + // Note that the root node passed to Turndown isn't translated -- only its + // children, since the root node is simply a container (a div or body tag) of + // items to translate. Only the root node's `renderAsPure` attribute is + // undefined; treat it as pure, since we never translate this node. if (parentNode.renderAsPure || parentNode.renderAsPure === undefined) { - return reduce.call(parentNode.childNodes, function (output, node) { + const output = reduce.call(parentNode.childNodes, function (output, node) { + // `output` consists of [output so far, li accumulator]. For non-li nodes, this node's output is added to the output so far. Otherwise, accumulate content for wrapping. Wrap accumulation rules: accumulate any text and non-block node; wrap the accumulator when on a non-accumulating node. node = new Node(node, self.options); var replacement = ''; + const nodeType = node.nodeType; // Is this a text node? - if (node.nodeType === 3) { + if (nodeType === 3) { replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue); // Is this an element node? - } else if (node.nodeType === 1) { + } else if (nodeType === 1) { replacement = replacementForNode.call(self, node); // In faithful mode, return the contents for these special cases. } else if (!self.options.renderAsPure) { - if (node.nodeType === 4) { + if (nodeType === 4) { replacement = ``; - } else if (node.nodeType === 7) { + } else if (nodeType === 7) { replacement = ``; - } else if (node.nodeType === 8) { + } else if (nodeType === 8) { replacement = ``; - } else if (node.nodeType === 10) { + } else if (nodeType === 10) { replacement = ``; + } else { + console.log(`Error: unexpected node type ${nodeType}.`); } } - return join(output, replacement) - }, '') + if (isLi) { + // Is this a non-accumulating node? + if (nodeType > 3 || (nodeType === 1 && blockNodeNames.has(node.nodeName))) { + // This is a non-accumulating node. Wrap the accumulated content, then clear the accumulator. + const wrappedAccumulator = wrapContent(output[1], node, self.options); + return [join(join(wrappedAccumulator, output[0]), replacement), ''] + } else { + // This is an accumulating node, so add this to the accumulator. + return [output[0], join(output[1], replacement)] + } + } else { + return [join(output[0], replacement), ''] + } + }, ['', '']); + return join(output[0], wrapContent(output[1], parentNode, self.options)) } else { - // If the `parentNode` represented itself as raw HTML, that contains all the contents of the child nodes. + // If the `parentNode` represented itself as raw HTML, that contains all the + // contents of the child nodes. return '' } } @@ -1102,9 +1305,14 @@ function postProcess (output) { } }); - return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '') + return cleanEmptyLines(output) } +// Remove extraneous newlines/tabs at the beginning and end of lines. This is +// a postprocessing method to call just before returning the converted Markdown +// output. +const cleanEmptyLines = (output) => output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, ''); + /** * Converts an element node to its Markdown equivalent * @private @@ -1121,7 +1329,9 @@ function replacementForNode (node) { if (whitespace.leading || whitespace.trailing) content = content.trim(); return ( whitespace.leading + - // If this node contains impure content, then it must be replaced with HTML. In this case, the `content` doesn't matter, so it's passed as an empty string. + // If this node contains impure content, then it must be replaced with HTML. + // In this case, the `content` doesn't matter, so it's passed as an empty + // string. (node.renderAsPure ? rule.replacement(content, node, this.options) : this.options.defaultReplacement('', node, this.options)) + whitespace.trailing )