Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
d1cda06
Other (markdown-gfm): Migrate to remark / rehype packages.
filipsobol Jun 6, 2025
5159434
Use strict dependency versions and replace `remark-gfm` with `remark-…
filipsobol Jun 6, 2025
78c2025
Fix the `keep` function in `html2markdown`.
filipsobol Jun 6, 2025
f382e31
Improve link handling
filipsobol Jun 6, 2025
0121651
Fix some failing tests
filipsobol Jun 6, 2025
b306b15
Restore original handling for line breaks and fix few failing tests
filipsobol Jun 11, 2025
f5444d7
Fix handling of editor's Todo lists
filipsobol Jun 11, 2025
fd6cea0
Enable autolinking
filipsobol Jun 12, 2025
2ffa4d0
Update tests
filipsobol Jun 12, 2025
f87e535
Add custom DOM-based plugin to replace `rehype-raw`.
filipsobol Jun 16, 2025
606c4fb
Merge branch 'master' into internal/4006-migrate-markdown-gfm-plugin-…
filipsobol Jun 16, 2025
88c3463
Update tests and fix type error
filipsobol Jun 16, 2025
1508d11
Add missing dependencies
filipsobol Jun 16, 2025
6b26925
Ignore coverage for one `if` statement and improve typings
filipsobol Jun 16, 2025
4cdb7bd
Improve types
filipsobol Jun 17, 2025
da32b49
Improve compatibility with To-do lists
filipsobol Jun 17, 2025
46be449
Escape `<` characters in `text` nodes
filipsobol Jun 17, 2025
dbcbf82
Update LICENSE.md
filipsobol Jun 17, 2025
fa0ed89
Fix todo lists
filipsobol Jun 23, 2025
d5db33f
Add bug related to a code block inside to-do lists to a list of known…
filipsobol Jun 23, 2025
42ad7be
Add tests
filipsobol Jun 23, 2025
e82ee39
Update main LICENSE.md
filipsobol Jun 23, 2025
0fcd2d6
Cover more cases of HTML inside Markdown
filipsobol Jun 24, 2025
34c11a3
Update manual test to allow testing allowed HTML tags
filipsobol Jun 24, 2025
9644127
Improve manual test
filipsobol Jun 24, 2025
c69e699
Merge branch 'master' into internal/4006-migrate-markdown-gfm-plugin-…
filipsobol Jun 24, 2025
38f2dbd
Merge branch 'master' into internal/4006-migrate-markdown-gfm-plugin-…
filipsobol Jun 25, 2025
d3de5e2
Replace `hast` with `@types/hast`
filipsobol Jun 26, 2025
5d973df
Update `LICENSE.md` files
filipsobol Jun 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions LICENSE.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,33 @@ Where not otherwise indicated, all CKEditor content is authored by CKSource engi
The following libraries are included in CKEditor under the [MIT license](https://opensource.org/licenses/MIT):

* @types/color-convert - Copyright (c) DefinitelyTyped.
* @types/marked - Copyright (c) DefinitelyTyped.
* @types/turndown - Copyright (c) DefinitelyTyped.
* blurhash - Copyright (c) Wolt Enterprises.
* color-convert - Copyright (c) 2011–2016 Heather Arthur <fayearthur@gmail.com>, copyright (c) 2016–2021 Josh Junon <josh@junon.me>.
* color-parse - Copyright (c) 2015 Dmitry Ivanov.
* emojibase-data - Copyright (c) 2017-2019 Miles Johnson.
* es-toolkit - Copyright (c) 2024 Viva Republica, Inc.
* fuzzysort - Copyright (c) 2018 Stephen Kamenar.
* is-emoji-supported - Copyright (c) 2016-2020 Koala Interactive, Inc.
* marked - Copyright (c) 2018+, MarkedJS (https://github.com/markedjs/), Copyright (c) 2011–2018, Christopher Jeffrey (https://github.com/chjj/).
* turndown - Copyright (c) 2017 Dom Christie.
* turndown-plugin-gfm - Copyright (c) 2017 Dom Christie.
* vanilla-colorful - Copyright (c) 2020 Serhii Kulykov <iamkulykov@gmail.com>.
* Regular Expression for URL validation - Copyright (c) 2010-2018 Diego Perini.
* @types/hast - Copyright (c) Microsoft Corporation.
* hast-util-to-html - Copyright (c) Titus Wormer <tituswormer@gmail.com>
* hast-util-to-mdast - Copyright (c) Titus Wormer <tituswormer@gmail.com> and Copyright (c) Seth Vincent <sethvincent@gmail.com>
* hastscript - Copyright (c) Titus Wormer <tituswormer@gmail.com>
* rehype-remark - Copyright (c) Titus Wormer <tituswormer@gmail.com>
* remark-breaks - Copyright (c) 2017 Titus Wormer <tituswormer@gmail.com>
* remark-gfm - Copyright (c) Titus Wormer <tituswormer@gmail.com>
* remark-parse - Copyright (c) 2014 Titus Wormer <tituswormer@gmail.com>
* remark-rehype - Copyright (c) Titus Wormer <tituswormer@gmail.com>
* remark-stringify - Copyright (c) 2014 Titus Wormer <tituswormer@gmail.com>
* unified - Copyright (c) 2015 Titus Wormer <tituswormer@gmail.com>
* unist-util-visit - Copyright (c) 2015 Titus Wormer <tituswormer@gmail.com>

The following libraries are included in CKEditor under the [ISC license](https://opensource.org/license/isc-license-txt):

* hast-util-from-dom - Copyright (c) Keith McKnight <keith@mcknig.ht>
* rehype-dom-parse - Copyright (c) 2018 Keith McKnight <keith@mcknig.ht>
* rehype-dom-stringify - Copyright (c) 2018 Keith McKnight <keith@mcknig.ht>

Trademarks
----------
Expand Down
23 changes: 18 additions & 5 deletions packages/ckeditor5-markdown-gfm/LICENSE.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,24 @@ Where not otherwise indicated, all CKEditor content is authored by CKSource engi

The following libraries are included in CKEditor under the [MIT license](https://opensource.org/licenses/MIT):

* @types/marked - Copyright (c) DefinitelyTyped.
* @types/turndown - Copyright (c) DefinitelyTyped.
* marked - Copyright (c) 2018+, MarkedJS (https://github.com/markedjs/), Copyright (c) 2011–2018, Christopher Jeffrey (https://github.com/chjj/).
* turndown - Copyright (c) 2017 Dom Christie.
* turndown-plugin-gfm - Copyright (c) 2017 Dom Christie.
* @types/hast - Copyright (c) Microsoft Corporation.
* hast-util-to-html - Copyright (c) Titus Wormer <tituswormer@gmail.com>
* hast-util-to-mdast - Copyright (c) Titus Wormer <tituswormer@gmail.com> and Copyright (c) Seth Vincent <sethvincent@gmail.com>
* hastscript - Copyright (c) Titus Wormer <tituswormer@gmail.com>
* rehype-remark - Copyright (c) Titus Wormer <tituswormer@gmail.com>
* remark-breaks - Copyright (c) 2017 Titus Wormer <tituswormer@gmail.com>
* remark-gfm - Copyright (c) Titus Wormer <tituswormer@gmail.com>
* remark-parse - Copyright (c) 2014 Titus Wormer <tituswormer@gmail.com>
* remark-rehype - Copyright (c) Titus Wormer <tituswormer@gmail.com>
* remark-stringify - Copyright (c) 2014 Titus Wormer <tituswormer@gmail.com>
* unified - Copyright (c) 2015 Titus Wormer <tituswormer@gmail.com>
* unist-util-visit - Copyright (c) 2015 Titus Wormer <tituswormer@gmail.com>

The following libraries are included in CKEditor under the [ISC license](https://opensource.org/license/isc-license-txt):

* hast-util-from-dom - Copyright (c) Keith McKnight <keith@mcknig.ht>
* rehype-dom-parse - Copyright (c) 2018 Keith McKnight <keith@mcknig.ht>
* rehype-dom-stringify - Copyright (c) 2018 Keith McKnight <keith@mcknig.ht>

Trademarks
----------
Expand Down
1 change: 1 addition & 0 deletions packages/ckeditor5-markdown-gfm/docs/features/markdown.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ While the Markdown plugin is stable and ready to use, some issues are still bein

* Pasting Markdown-formatted content does not automatically convert the pasted syntax markers into properly formatted content. GitHub issues: [#2321](https://github.com/ckeditor/ckeditor5/issues/2321), [#2322](https://github.com/ckeditor/ckeditor5/issues/2322).
* The Markdown code generated with the Markdown output feature will not properly render {@link features/tables#nesting-tables nested tables}. GitHub issue: [#9475](https://github.com/ckeditor/ckeditor5/issues/9475).
* Code blocks inside To-do lists will not properly render. GitHub issue: [#18754](https://github.com/ckeditor/ckeditor5/issues/18754).

## Related features

Expand Down
25 changes: 16 additions & 9 deletions packages/ckeditor5-markdown-gfm/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,22 @@
"@ckeditor/ckeditor5-clipboard": "45.2.1",
"@ckeditor/ckeditor5-core": "45.2.1",
"@ckeditor/ckeditor5-engine": "45.2.1",
"@types/marked": "4.3.2",
"@types/turndown": "5.0.5",
"@types/hast": "3.0.4",
"ckeditor5": "45.2.1",
"marked": "4.0.12",
"turndown": "7.2.0",
"turndown-plugin-gfm": "1.0.2"
"hast-util-from-dom": "5.0.1",
"hast-util-to-html": "9.0.5",
"hast-util-to-mdast": "10.1.2",
"hastscript": "9.0.1",
"rehype-dom-parse": "5.0.2",
"rehype-dom-stringify": "4.0.2",
"rehype-remark": "10.0.1",
"remark-breaks": "4.0.0",
"remark-gfm": "4.0.1",
"remark-parse": "11.0.0",
"remark-rehype": "11.1.2",
"remark-stringify": "11.0.0",
"unified": "11.0.5",
"unist-util-visit": "5.0.0"
},
"devDependencies": {
"@ckeditor/ckeditor5-autoformat": "45.2.1",
Expand Down Expand Up @@ -73,10 +83,7 @@
"CHANGELOG.md"
],
"depcheckIgnore": [
"collapse-whitespace",
"jsdom",
"void-elements",
"block-elements"
"hast"
],
"scripts": {
"dll:build": "webpack",
Expand Down
2 changes: 1 addition & 1 deletion packages/ckeditor5-markdown-gfm/src/gfmdataprocessor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ export class MarkdownGfmDataProcessor implements DataProcessor {
* @param element The element name to be kept.
*/
public keepHtml( element: keyof HTMLElementTagNameMap ): void {
this._html2markdown.keep( [ element ] );
this._html2markdown.keep( element );
}

/**
Expand Down
228 changes: 83 additions & 145 deletions packages/ckeditor5-markdown-gfm/src/html2markdown/html2markdown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,166 +7,104 @@
* @module markdown-gfm/html2markdown/html2markdown
*/

import Turndown from 'turndown';
import { unified, type Plugin } from 'unified';
import rehypeParse from 'rehype-dom-parse';
import rehypeRemark from 'rehype-remark';
import remarkBreaks from 'remark-breaks';
import remarkGfm from 'remark-gfm';
import remarkStringify from 'remark-stringify';
import { visit } from 'unist-util-visit';
import { h } from 'hastscript';
import { toHtml } from 'hast-util-to-html';
import type { Handle, State } from 'hast-util-to-mdast';
import type { Element, Node, Root } from 'hast';

// There no avaialble types for 'turndown-plugin-gfm' module and it's not worth to generate them on our own.
/* eslint-disable @typescript-eslint/ban-ts-comment */
// @ts-ignore
import { gfm } from 'turndown-plugin-gfm';

const autolinkRegex = /* #__PURE__ */ new RegExp(
// Prefix.
/\b(?:(?:https?|ftp):\/\/|www\.)/.source +

// Domain name.
/(?![-_])(?:[-_a-z0-9\u00a1-\uffff]{1,63}\.)+(?:[a-z\u00a1-\uffff]{2,63})/.source +

// The rest.
/(?:[^\s<>]*)/.source,
'gi'
);

class UpdatedTurndown extends Turndown {
public override escape( string: string ): string {
const originalEscape = super.escape;

function escape( string: string ): string {
string = originalEscape( string );

// Escape "<".
string = string.replace( /</g, '\\<' );

return string;
}

// Urls should not be escaped. Our strategy is using a regex to find them and escape everything
// which is out of the matches parts.

let escaped = '';
let lastLinkEnd = 0;

for ( const match of this._matchAutolink( string ) ) {
const index = match.index!;

// Append the substring between the last match and the current one (if anything).
if ( index > lastLinkEnd ) {
escaped += escape( string.substring( lastLinkEnd, index ) );
}

const matchedURL = match[ 0 ];

escaped += matchedURL;

lastLinkEnd = index + matchedURL.length;
}

// Add text after the last link or at the string start if no matches.
if ( lastLinkEnd < string.length ) {
escaped += escape( string.substring( lastLinkEnd, string.length ) );
}
export class MarkdownGfmHtmlToMd {
private _processor: any;
private _keepRawTags: Array<string> = [];

return escaped;
constructor() {
this._buildProcessor();
}

/**
* Trimming end of link.
* https://github.github.com/gfm/#autolinks-extension-
*/
private* _matchAutolink( string: string ) {
for ( const match of string.matchAll( autolinkRegex ) ) {
const matched = match[ 0 ];
const length = this._autolinkFindEnd( matched );

yield Object.assign(
[ matched.substring( 0, length ) ],
{ index: match.index }
);
public keep( tagName: string ): void {
this._keepRawTags.push( tagName.toLowerCase() );
this._buildProcessor();
}

// We could adjust regex.lastIndex but it's not needed because what we skipped is for sure not a valid URL.
}
public parse( html: string ): string {
return this._processor!
.processSync( html )
.toString()
.trim();
}

/**
* Returns the new length of the link (after it would trim trailing characters).
* Returns handlers for raw HTML tags that should be kept in the Markdown output.
*/
private _autolinkFindEnd( string: string ) {
let length = string.length;

while ( length > 0 ) {
const char = string[ length - 1 ];

if ( '?!.,:*_~\'"'.includes( char ) ) {
length--;
} else if ( char == ')' ) {
let openBrackets = 0;

for ( let i = 0; i < length; i++ ) {
if ( string[ i ] == '(' ) {
openBrackets++;
} else if ( string[ i ] == ')' ) {
openBrackets--;
}
}

// If there is fewer opening brackets then closing ones we should remove a closing bracket.
if ( openBrackets < 0 ) {
length--;
} else {
break;
}
} else {
break;
}
}
private _getRawTagsHandlers(): Record<string, Handle> {
return this._keepRawTags.reduce( ( handlers: Record<string, Handle>, tagName: string ) => {
handlers[ tagName ] = ( state: State, node: Element ): any => {
const tag = toHtml( h( node.tagName, node.properties ), {
allowDangerousHtml: true,
closeSelfClosing: true
} );

const endOfOpeningTagIndex = tag.indexOf( '>' );
const openingTag = tag.slice( 0, endOfOpeningTagIndex + 1 );
const closingTag = tag.slice( endOfOpeningTagIndex + 1 );

return [
{ type: 'html', value: openingTag },
...state.all( node ),
{ type: 'html', value: closingTag }
];
};
return handlers;
}, {} as Record<string, Handle> );
}

return length;
private _buildProcessor() {
this._processor = unified()
// Parse HTML to an abstract syntax tree (AST).
.use( rehypeParse )
// Removes `<label>` element from TODO lists.
.use( removeLabelFromCheckboxes )
// Turns HTML syntax tree into Markdown syntax tree.
.use( rehypeRemark, {
// Keeps allowed HTML tags.
handlers: this._getRawTagsHandlers()
} )
// Adds support for GitHub Flavored Markdown (GFM).
.use( remarkGfm, {
singleTilde: true
} )
// Replaces line breaks with `<br>` tags.
.use( remarkBreaks )
// Serializes Markdown syntax tree to Markdown string.
.use( remarkStringify, {
resourceLink: true,
emphasis: '_',
rule: '-',
handlers: {
break: () => '\n'
},
unsafe: [
{ character: '<' }
]
} );
}
}

/**
* This is a helper class used by the {@link module:markdown-gfm/markdown Markdown feature} to convert HTML to Markdown.
* Removes `<label>` element from TODO lists, so that `<input>` and `text` are direct children of `<li>`.
*/
export class MarkdownGfmHtmlToMd {
private _parser: UpdatedTurndown;

constructor() {
this._parser = this._createParser();
}

public parse( html: string ): string {
return this._parser.turndown( html );
}

public keep( elements: Turndown.Filter ): void {
this._parser.keep( elements );
}

private _createParser(): UpdatedTurndown {
const parser = new UpdatedTurndown( {
codeBlockStyle: 'fenced',
hr: '---',
headingStyle: 'atx'
} );

parser.use( [
gfm,
this._todoList
] );

return parser;
}

// This is a copy of the original taskListItems rule from turndown-plugin-gfm, with minor changes.
private _todoList( turndown: UpdatedTurndown ): void {
turndown.addRule( 'taskListItems', {
filter( node: any ) {
return node.type === 'checkbox' &&
// Changes here as CKEditor outputs a deeper structure.
( node.parentNode.nodeName === 'LI' || node.parentNode.parentNode.nodeName === 'LI' );
},
replacement( content: any, node: any ) {
return ( node.checked ? '[x]' : '[ ]' ) + ' ';
function removeLabelFromCheckboxes(): ReturnType<Plugin> {
return function( tree: Node ): void {
visit( tree, 'element', ( node: Element, index: number | null, parent: Root | Element ) => {
if ( index !== null && node.tagName === 'label' && parent.type === 'element' && parent.tagName === 'li' ) {
parent.children.splice( index, 1, ...node.children );
}
} );
}
};
}
Loading