Skip to content

Commit e4bed9d

Browse files
authored
Merge pull request #18645 from ckeditor/internal/4006-migrate-markdown-gfm-plugin-to-unifiedjs-ecosystem
Other (markdown-gfm): Migrate to `remark` / `rehype` packages. Closes #18684. MINOR BREAKING CHANGE (markdown-gfm): Migrate from `marked` and `turndown` to `remark` and `rehype`. MINOR BREAKING CHANGE (markdown-gfm): Enable autolinking in Markdown (works only when loading Markdown content into the editor).
2 parents 1baa8c1 + 5d973df commit e4bed9d

File tree

20 files changed

+521
-359
lines changed

20 files changed

+521
-359
lines changed

LICENSE.md

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,33 @@ Where not otherwise indicated, all CKEditor content is authored by CKSource engi
2121
The following libraries are included in CKEditor under the [MIT license](https://opensource.org/licenses/MIT):
2222

2323
* @types/color-convert - Copyright (c) DefinitelyTyped.
24-
* @types/marked - Copyright (c) DefinitelyTyped.
25-
* @types/turndown - Copyright (c) DefinitelyTyped.
2624
* blurhash - Copyright (c) Wolt Enterprises.
2725
* color-convert - Copyright (c) 2011–2016 Heather Arthur <fayearthur@gmail.com>, copyright (c) 2016–2021 Josh Junon <josh@junon.me>.
2826
* color-parse - Copyright (c) 2015 Dmitry Ivanov.
2927
* emojibase-data - Copyright (c) 2017-2019 Miles Johnson.
3028
* es-toolkit - Copyright (c) 2024 Viva Republica, Inc.
3129
* fuzzysort - Copyright (c) 2018 Stephen Kamenar.
3230
* is-emoji-supported - Copyright (c) 2016-2020 Koala Interactive, Inc.
33-
* marked - Copyright (c) 2018+, MarkedJS (https://github.com/markedjs/), Copyright (c) 2011–2018, Christopher Jeffrey (https://github.com/chjj/).
34-
* turndown - Copyright (c) 2017 Dom Christie.
35-
* turndown-plugin-gfm - Copyright (c) 2017 Dom Christie.
3631
* vanilla-colorful - Copyright (c) 2020 Serhii Kulykov <iamkulykov@gmail.com>.
3732
* Regular Expression for URL validation - Copyright (c) 2010-2018 Diego Perini.
33+
* @types/hast - Copyright (c) Microsoft Corporation.
34+
* hast-util-to-html - Copyright (c) Titus Wormer <tituswormer@gmail.com>
35+
* hast-util-to-mdast - Copyright (c) Titus Wormer <tituswormer@gmail.com> and Copyright (c) Seth Vincent <sethvincent@gmail.com>
36+
* hastscript - Copyright (c) Titus Wormer <tituswormer@gmail.com>
37+
* rehype-remark - Copyright (c) Titus Wormer <tituswormer@gmail.com>
38+
* remark-breaks - Copyright (c) 2017 Titus Wormer <tituswormer@gmail.com>
39+
* remark-gfm - Copyright (c) Titus Wormer <tituswormer@gmail.com>
40+
* remark-parse - Copyright (c) 2014 Titus Wormer <tituswormer@gmail.com>
41+
* remark-rehype - Copyright (c) Titus Wormer <tituswormer@gmail.com>
42+
* remark-stringify - Copyright (c) 2014 Titus Wormer <tituswormer@gmail.com>
43+
* unified - Copyright (c) 2015 Titus Wormer <tituswormer@gmail.com>
44+
* unist-util-visit - Copyright (c) 2015 Titus Wormer <tituswormer@gmail.com>
45+
46+
The following libraries are included in CKEditor under the [ISC license](https://opensource.org/license/isc-license-txt):
47+
48+
* hast-util-from-dom - Copyright (c) Keith McKnight <keith@mcknig.ht>
49+
* rehype-dom-parse - Copyright (c) 2018 Keith McKnight <keith@mcknig.ht>
50+
* rehype-dom-stringify - Copyright (c) 2018 Keith McKnight <keith@mcknig.ht>
3851

3952
Trademarks
4053
----------

packages/ckeditor5-markdown-gfm/LICENSE.md

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,24 @@ Where not otherwise indicated, all CKEditor content is authored by CKSource engi
1818

1919
The following libraries are included in CKEditor under the [MIT license](https://opensource.org/licenses/MIT):
2020

21-
* @types/marked - Copyright (c) DefinitelyTyped.
22-
* @types/turndown - Copyright (c) DefinitelyTyped.
23-
* marked - Copyright (c) 2018+, MarkedJS (https://github.com/markedjs/), Copyright (c) 2011–2018, Christopher Jeffrey (https://github.com/chjj/).
24-
* turndown - Copyright (c) 2017 Dom Christie.
25-
* turndown-plugin-gfm - Copyright (c) 2017 Dom Christie.
21+
* @types/hast - Copyright (c) Microsoft Corporation.
22+
* hast-util-to-html - Copyright (c) Titus Wormer <tituswormer@gmail.com>
23+
* hast-util-to-mdast - Copyright (c) Titus Wormer <tituswormer@gmail.com> and Copyright (c) Seth Vincent <sethvincent@gmail.com>
24+
* hastscript - Copyright (c) Titus Wormer <tituswormer@gmail.com>
25+
* rehype-remark - Copyright (c) Titus Wormer <tituswormer@gmail.com>
26+
* remark-breaks - Copyright (c) 2017 Titus Wormer <tituswormer@gmail.com>
27+
* remark-gfm - Copyright (c) Titus Wormer <tituswormer@gmail.com>
28+
* remark-parse - Copyright (c) 2014 Titus Wormer <tituswormer@gmail.com>
29+
* remark-rehype - Copyright (c) Titus Wormer <tituswormer@gmail.com>
30+
* remark-stringify - Copyright (c) 2014 Titus Wormer <tituswormer@gmail.com>
31+
* unified - Copyright (c) 2015 Titus Wormer <tituswormer@gmail.com>
32+
* unist-util-visit - Copyright (c) 2015 Titus Wormer <tituswormer@gmail.com>
33+
34+
The following libraries are included in CKEditor under the [ISC license](https://opensource.org/license/isc-license-txt):
35+
36+
* hast-util-from-dom - Copyright (c) Keith McKnight <keith@mcknig.ht>
37+
* rehype-dom-parse - Copyright (c) 2018 Keith McKnight <keith@mcknig.ht>
38+
* rehype-dom-stringify - Copyright (c) 2018 Keith McKnight <keith@mcknig.ht>
2639

2740
Trademarks
2841
----------

packages/ckeditor5-markdown-gfm/docs/features/markdown.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ While the Markdown plugin is stable and ready to use, some issues are still bein
9393

9494
* Pasting Markdown-formatted content does not automatically convert the pasted syntax markers into properly formatted content. GitHub issues: [#2321](https://github.com/ckeditor/ckeditor5/issues/2321), [#2322](https://github.com/ckeditor/ckeditor5/issues/2322).
9595
* The Markdown code generated with the Markdown output feature will not properly render {@link features/tables#nesting-tables nested tables}. GitHub issue: [#9475](https://github.com/ckeditor/ckeditor5/issues/9475).
96+
* Code blocks inside To-do lists will not properly render. GitHub issue: [#18754](https://github.com/ckeditor/ckeditor5/issues/18754).
9697

9798
## Related features
9899

packages/ckeditor5-markdown-gfm/package.json

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,22 @@
2121
"@ckeditor/ckeditor5-clipboard": "45.2.1",
2222
"@ckeditor/ckeditor5-core": "45.2.1",
2323
"@ckeditor/ckeditor5-engine": "45.2.1",
24-
"@types/marked": "4.3.2",
25-
"@types/turndown": "5.0.5",
24+
"@types/hast": "3.0.4",
2625
"ckeditor5": "45.2.1",
27-
"marked": "4.0.12",
28-
"turndown": "7.2.0",
29-
"turndown-plugin-gfm": "1.0.2"
26+
"hast-util-from-dom": "5.0.1",
27+
"hast-util-to-html": "9.0.5",
28+
"hast-util-to-mdast": "10.1.2",
29+
"hastscript": "9.0.1",
30+
"rehype-dom-parse": "5.0.2",
31+
"rehype-dom-stringify": "4.0.2",
32+
"rehype-remark": "10.0.1",
33+
"remark-breaks": "4.0.0",
34+
"remark-gfm": "4.0.1",
35+
"remark-parse": "11.0.0",
36+
"remark-rehype": "11.1.2",
37+
"remark-stringify": "11.0.0",
38+
"unified": "11.0.5",
39+
"unist-util-visit": "5.0.0"
3040
},
3141
"devDependencies": {
3242
"@ckeditor/ckeditor5-autoformat": "45.2.1",
@@ -73,10 +83,7 @@
7383
"CHANGELOG.md"
7484
],
7585
"depcheckIgnore": [
76-
"collapse-whitespace",
77-
"jsdom",
78-
"void-elements",
79-
"block-elements"
86+
"hast"
8087
],
8188
"scripts": {
8289
"dll:build": "webpack",

packages/ckeditor5-markdown-gfm/src/gfmdataprocessor.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ export class MarkdownGfmDataProcessor implements DataProcessor {
5757
* @param element The element name to be kept.
5858
*/
5959
public keepHtml( element: keyof HTMLElementTagNameMap ): void {
60-
this._html2markdown.keep( [ element ] );
60+
this._html2markdown.keep( element );
6161
}
6262

6363
/**

packages/ckeditor5-markdown-gfm/src/html2markdown/html2markdown.ts

Lines changed: 83 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -7,166 +7,104 @@
77
* @module markdown-gfm/html2markdown/html2markdown
88
*/
99

10-
import Turndown from 'turndown';
10+
import { unified, type Plugin } from 'unified';
11+
import rehypeParse from 'rehype-dom-parse';
12+
import rehypeRemark from 'rehype-remark';
13+
import remarkBreaks from 'remark-breaks';
14+
import remarkGfm from 'remark-gfm';
15+
import remarkStringify from 'remark-stringify';
16+
import { visit } from 'unist-util-visit';
17+
import { h } from 'hastscript';
18+
import { toHtml } from 'hast-util-to-html';
19+
import type { Handle, State } from 'hast-util-to-mdast';
20+
import type { Element, Node, Root } from 'hast';
1121

12-
// There no avaialble types for 'turndown-plugin-gfm' module and it's not worth to generate them on our own.
13-
/* eslint-disable @typescript-eslint/ban-ts-comment */
14-
// @ts-ignore
15-
import { gfm } from 'turndown-plugin-gfm';
16-
17-
const autolinkRegex = /* #__PURE__ */ new RegExp(
18-
// Prefix.
19-
/\b(?:(?:https?|ftp):\/\/|www\.)/.source +
20-
21-
// Domain name.
22-
/(?![-_])(?:[-_a-z0-9\u00a1-\uffff]{1,63}\.)+(?:[a-z\u00a1-\uffff]{2,63})/.source +
23-
24-
// The rest.
25-
/(?:[^\s<>]*)/.source,
26-
'gi'
27-
);
28-
29-
class UpdatedTurndown extends Turndown {
30-
public override escape( string: string ): string {
31-
const originalEscape = super.escape;
32-
33-
function escape( string: string ): string {
34-
string = originalEscape( string );
35-
36-
// Escape "<".
37-
string = string.replace( /</g, '\\<' );
38-
39-
return string;
40-
}
41-
42-
// Urls should not be escaped. Our strategy is using a regex to find them and escape everything
43-
// which is out of the matches parts.
44-
45-
let escaped = '';
46-
let lastLinkEnd = 0;
47-
48-
for ( const match of this._matchAutolink( string ) ) {
49-
const index = match.index!;
50-
51-
// Append the substring between the last match and the current one (if anything).
52-
if ( index > lastLinkEnd ) {
53-
escaped += escape( string.substring( lastLinkEnd, index ) );
54-
}
55-
56-
const matchedURL = match[ 0 ];
57-
58-
escaped += matchedURL;
59-
60-
lastLinkEnd = index + matchedURL.length;
61-
}
62-
63-
// Add text after the last link or at the string start if no matches.
64-
if ( lastLinkEnd < string.length ) {
65-
escaped += escape( string.substring( lastLinkEnd, string.length ) );
66-
}
22+
export class MarkdownGfmHtmlToMd {
23+
private _processor: any;
24+
private _keepRawTags: Array<string> = [];
6725

68-
return escaped;
26+
constructor() {
27+
this._buildProcessor();
6928
}
7029

71-
/**
72-
* Trimming end of link.
73-
* https://github.github.com/gfm/#autolinks-extension-
74-
*/
75-
private* _matchAutolink( string: string ) {
76-
for ( const match of string.matchAll( autolinkRegex ) ) {
77-
const matched = match[ 0 ];
78-
const length = this._autolinkFindEnd( matched );
79-
80-
yield Object.assign(
81-
[ matched.substring( 0, length ) ],
82-
{ index: match.index }
83-
);
30+
public keep( tagName: string ): void {
31+
this._keepRawTags.push( tagName.toLowerCase() );
32+
this._buildProcessor();
33+
}
8434

85-
// We could adjust regex.lastIndex but it's not needed because what we skipped is for sure not a valid URL.
86-
}
35+
public parse( html: string ): string {
36+
return this._processor!
37+
.processSync( html )
38+
.toString()
39+
.trim();
8740
}
8841

8942
/**
90-
* Returns the new length of the link (after it would trim trailing characters).
43+
* Returns handlers for raw HTML tags that should be kept in the Markdown output.
9144
*/
92-
private _autolinkFindEnd( string: string ) {
93-
let length = string.length;
94-
95-
while ( length > 0 ) {
96-
const char = string[ length - 1 ];
97-
98-
if ( '?!.,:*_~\'"'.includes( char ) ) {
99-
length--;
100-
} else if ( char == ')' ) {
101-
let openBrackets = 0;
102-
103-
for ( let i = 0; i < length; i++ ) {
104-
if ( string[ i ] == '(' ) {
105-
openBrackets++;
106-
} else if ( string[ i ] == ')' ) {
107-
openBrackets--;
108-
}
109-
}
110-
111-
// If there is fewer opening brackets then closing ones we should remove a closing bracket.
112-
if ( openBrackets < 0 ) {
113-
length--;
114-
} else {
115-
break;
116-
}
117-
} else {
118-
break;
119-
}
120-
}
45+
private _getRawTagsHandlers(): Record<string, Handle> {
46+
return this._keepRawTags.reduce( ( handlers: Record<string, Handle>, tagName: string ) => {
47+
handlers[ tagName ] = ( state: State, node: Element ): any => {
48+
const tag = toHtml( h( node.tagName, node.properties ), {
49+
allowDangerousHtml: true,
50+
closeSelfClosing: true
51+
} );
52+
53+
const endOfOpeningTagIndex = tag.indexOf( '>' );
54+
const openingTag = tag.slice( 0, endOfOpeningTagIndex + 1 );
55+
const closingTag = tag.slice( endOfOpeningTagIndex + 1 );
56+
57+
return [
58+
{ type: 'html', value: openingTag },
59+
...state.all( node ),
60+
{ type: 'html', value: closingTag }
61+
];
62+
};
63+
return handlers;
64+
}, {} as Record<string, Handle> );
65+
}
12166

122-
return length;
67+
private _buildProcessor() {
68+
this._processor = unified()
69+
// Parse HTML to an abstract syntax tree (AST).
70+
.use( rehypeParse )
71+
// Removes `<label>` element from TODO lists.
72+
.use( removeLabelFromCheckboxes )
73+
// Turns HTML syntax tree into Markdown syntax tree.
74+
.use( rehypeRemark, {
75+
// Keeps allowed HTML tags.
76+
handlers: this._getRawTagsHandlers()
77+
} )
78+
// Adds support for GitHub Flavored Markdown (GFM).
79+
.use( remarkGfm, {
80+
singleTilde: true
81+
} )
82+
// Replaces line breaks with `<br>` tags.
83+
.use( remarkBreaks )
84+
// Serializes Markdown syntax tree to Markdown string.
85+
.use( remarkStringify, {
86+
resourceLink: true,
87+
emphasis: '_',
88+
rule: '-',
89+
handlers: {
90+
break: () => '\n'
91+
},
92+
unsafe: [
93+
{ character: '<' }
94+
]
95+
} );
12396
}
12497
}
12598

12699
/**
127-
* This is a helper class used by the {@link module:markdown-gfm/markdown Markdown feature} to convert HTML to Markdown.
100+
* Removes `<label>` element from TODO lists, so that `<input>` and `text` are direct children of `<li>`.
128101
*/
129-
export class MarkdownGfmHtmlToMd {
130-
private _parser: UpdatedTurndown;
131-
132-
constructor() {
133-
this._parser = this._createParser();
134-
}
135-
136-
public parse( html: string ): string {
137-
return this._parser.turndown( html );
138-
}
139-
140-
public keep( elements: Turndown.Filter ): void {
141-
this._parser.keep( elements );
142-
}
143-
144-
private _createParser(): UpdatedTurndown {
145-
const parser = new UpdatedTurndown( {
146-
codeBlockStyle: 'fenced',
147-
hr: '---',
148-
headingStyle: 'atx'
149-
} );
150-
151-
parser.use( [
152-
gfm,
153-
this._todoList
154-
] );
155-
156-
return parser;
157-
}
158-
159-
// This is a copy of the original taskListItems rule from turndown-plugin-gfm, with minor changes.
160-
private _todoList( turndown: UpdatedTurndown ): void {
161-
turndown.addRule( 'taskListItems', {
162-
filter( node: any ) {
163-
return node.type === 'checkbox' &&
164-
// Changes here as CKEditor outputs a deeper structure.
165-
( node.parentNode.nodeName === 'LI' || node.parentNode.parentNode.nodeName === 'LI' );
166-
},
167-
replacement( content: any, node: any ) {
168-
return ( node.checked ? '[x]' : '[ ]' ) + ' ';
102+
function removeLabelFromCheckboxes(): ReturnType<Plugin> {
103+
return function( tree: Node ): void {
104+
visit( tree, 'element', ( node: Element, index: number | null, parent: Root | Element ) => {
105+
if ( index !== null && node.tagName === 'label' && parent.type === 'element' && parent.tagName === 'li' ) {
106+
parent.children.splice( index, 1, ...node.children );
169107
}
170108
} );
171-
}
109+
};
172110
}

0 commit comments

Comments
 (0)