ckeditor
diff --git a/‎LICENSE.md‎
Lines changed: 18 additions & 5 deletions b/‎LICENSE.md‎
Lines changed: 18 additions & 5 deletions
diff --git a/‎packages/ckeditor5-markdown-gfm/LICENSE.md‎
Lines changed: 18 additions & 5 deletions b/‎packages/ckeditor5-markdown-gfm/LICENSE.md‎
Lines changed: 18 additions & 5 deletions
diff --git a/‎packages/ckeditor5-markdown-gfm/docs/features/markdown.md‎
Lines changed: 1 addition & 0 deletions b/‎packages/ckeditor5-markdown-gfm/docs/features/markdown.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎packages/ckeditor5-markdown-gfm/package.json‎
Lines changed: 16 additions & 9 deletions b/‎packages/ckeditor5-markdown-gfm/package.json‎
Lines changed: 16 additions & 9 deletions
diff --git a/‎packages/ckeditor5-markdown-gfm/src/gfmdataprocessor.ts‎
Lines changed: 1 addition & 1 deletion b/‎packages/ckeditor5-markdown-gfm/src/gfmdataprocessor.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/ckeditor5-markdown-gfm/src/html2markdown/html2markdown.ts‎
Lines changed: 83 additions & 145 deletions b/‎packages/ckeditor5-markdown-gfm/src/html2markdown/html2markdown.ts‎
Lines changed: 83 additions & 145 deletions
@@ -21,20 +21,33 @@ Where not otherwise indicated, all CKEditor content is authored by CKSource engi
 The following libraries are included in CKEditor under the [MIT license](https://opensource.org/licenses/MIT):
 
 * @types/color-convert - Copyright (c) DefinitelyTyped.
-* @types/marked - Copyright (c) DefinitelyTyped.
-* @types/turndown - Copyright (c) DefinitelyTyped.
 * blurhash - Copyright (c) Wolt Enterprises.
 * color-convert - Copyright (c) 2011–2016 Heather Arthur <fayearthur@gmail.com>, copyright (c) 2016–2021 Josh Junon <josh@junon.me>.
 * color-parse - Copyright (c) 2015 Dmitry Ivanov.
 * emojibase-data - Copyright (c) 2017-2019 Miles Johnson.
 * es-toolkit - Copyright (c) 2024 Viva Republica, Inc.
 * fuzzysort - Copyright (c) 2018 Stephen Kamenar.
 * is-emoji-supported - Copyright (c) 2016-2020 Koala Interactive, Inc.
-* marked - Copyright (c) 2018+, MarkedJS (https://github.com/markedjs/), Copyright (c) 2011–2018, Christopher Jeffrey (https://github.com/chjj/).
-* turndown - Copyright (c) 2017 Dom Christie.
-* turndown-plugin-gfm - Copyright (c) 2017 Dom Christie.
 * vanilla-colorful - Copyright (c) 2020 Serhii Kulykov <iamkulykov@gmail.com>.
 * Regular Expression for URL validation - Copyright (c) 2010-2018 Diego Perini.
+* @types/hast - Copyright (c) Microsoft Corporation.
+* hast-util-to-html - Copyright (c) Titus Wormer <tituswormer@gmail.com>
+* hast-util-to-mdast - Copyright (c) Titus Wormer <tituswormer@gmail.com> and Copyright (c) Seth Vincent <sethvincent@gmail.com>
+* hastscript - Copyright (c) Titus Wormer <tituswormer@gmail.com>
+* rehype-remark - Copyright (c) Titus Wormer <tituswormer@gmail.com>
+* remark-breaks - Copyright (c) 2017 Titus Wormer <tituswormer@gmail.com>
+* remark-gfm - Copyright (c) Titus Wormer <tituswormer@gmail.com>
+* remark-parse - Copyright (c) 2014 Titus Wormer <tituswormer@gmail.com>
+* remark-rehype - Copyright (c) Titus Wormer <tituswormer@gmail.com>
+* remark-stringify - Copyright (c) 2014 Titus Wormer <tituswormer@gmail.com>
+* unified - Copyright (c) 2015 Titus Wormer <tituswormer@gmail.com>
+* unist-util-visit - Copyright (c) 2015 Titus Wormer <tituswormer@gmail.com>
+
+The following libraries are included in CKEditor under the [ISC license](https://opensource.org/license/isc-license-txt):
+
+* hast-util-from-dom - Copyright (c) Keith McKnight <keith@mcknig.ht>
+* rehype-dom-parse - Copyright (c) 2018 Keith McKnight <keith@mcknig.ht>
+* rehype-dom-stringify - Copyright (c) 2018 Keith McKnight <keith@mcknig.ht>
 
 Trademarks
 ----------
 
@@ -18,11 +18,24 @@ Where not otherwise indicated, all CKEditor content is authored by CKSource engi
 
 The following libraries are included in CKEditor under the [MIT license](https://opensource.org/licenses/MIT):
 
-* @types/marked - Copyright (c) DefinitelyTyped.
-* @types/turndown - Copyright (c) DefinitelyTyped.
-* marked - Copyright (c) 2018+, MarkedJS (https://github.com/markedjs/), Copyright (c) 2011–2018, Christopher Jeffrey (https://github.com/chjj/).
-* turndown - Copyright (c) 2017 Dom Christie.
-* turndown-plugin-gfm - Copyright (c) 2017 Dom Christie.
+* @types/hast - Copyright (c) Microsoft Corporation.
+* hast-util-to-html - Copyright (c) Titus Wormer <tituswormer@gmail.com>
+* hast-util-to-mdast - Copyright (c) Titus Wormer <tituswormer@gmail.com> and Copyright (c) Seth Vincent <sethvincent@gmail.com>
+* hastscript - Copyright (c) Titus Wormer <tituswormer@gmail.com>
+* rehype-remark - Copyright (c) Titus Wormer <tituswormer@gmail.com>
+* remark-breaks - Copyright (c) 2017 Titus Wormer <tituswormer@gmail.com>
+* remark-gfm - Copyright (c) Titus Wormer <tituswormer@gmail.com>
+* remark-parse - Copyright (c) 2014 Titus Wormer <tituswormer@gmail.com>
+* remark-rehype - Copyright (c) Titus Wormer <tituswormer@gmail.com>
+* remark-stringify - Copyright (c) 2014 Titus Wormer <tituswormer@gmail.com>
+* unified - Copyright (c) 2015 Titus Wormer <tituswormer@gmail.com>
+* unist-util-visit - Copyright (c) 2015 Titus Wormer <tituswormer@gmail.com>
+
+The following libraries are included in CKEditor under the [ISC license](https://opensource.org/license/isc-license-txt):
+
+* hast-util-from-dom - Copyright (c) Keith McKnight <keith@mcknig.ht>
+* rehype-dom-parse - Copyright (c) 2018 Keith McKnight <keith@mcknig.ht>
+* rehype-dom-stringify - Copyright (c) 2018 Keith McKnight <keith@mcknig.ht>
 
 Trademarks
 ----------
 
@@ -93,6 +93,7 @@ While the Markdown plugin is stable and ready to use, some issues are still bein
 
 * Pasting Markdown-formatted content does not automatically convert the pasted syntax markers into properly formatted content. GitHub issues: [#2321](https://github.com/ckeditor/ckeditor5/issues/2321), [#2322](https://github.com/ckeditor/ckeditor5/issues/2322).
 * The Markdown code generated with the Markdown output feature will not properly render {@link features/tables#nesting-tables nested tables}. GitHub issue: [#9475](https://github.com/ckeditor/ckeditor5/issues/9475).
+* Code blocks inside To-do lists will not properly render. GitHub issue: [#18754](https://github.com/ckeditor/ckeditor5/issues/18754).
 
 ## Related features
 
 
@@ -21,12 +21,22 @@
     "@ckeditor/ckeditor5-clipboard": "45.2.1",
     "@ckeditor/ckeditor5-core": "45.2.1",
     "@ckeditor/ckeditor5-engine": "45.2.1",
-    "@types/marked": "4.3.2",
-    "@types/turndown": "5.0.5",
+    "@types/hast": "3.0.4",
     "ckeditor5": "45.2.1",
-    "marked": "4.0.12",
-    "turndown": "7.2.0",
-    "turndown-plugin-gfm": "1.0.2"
+    "hast-util-from-dom": "5.0.1",
+    "hast-util-to-html": "9.0.5",
+    "hast-util-to-mdast": "10.1.2",
+    "hastscript": "9.0.1",
+    "rehype-dom-parse": "5.0.2",
+    "rehype-dom-stringify": "4.0.2",
+    "rehype-remark": "10.0.1",
+    "remark-breaks": "4.0.0",
+    "remark-gfm": "4.0.1",
+    "remark-parse": "11.0.0",
+    "remark-rehype": "11.1.2",
+    "remark-stringify": "11.0.0",
+    "unified": "11.0.5",
+    "unist-util-visit": "5.0.0"
   },
   "devDependencies": {
     "@ckeditor/ckeditor5-autoformat": "45.2.1",
@@ -73,10 +83,7 @@
     "CHANGELOG.md"
   ],
   "depcheckIgnore": [
-    "collapse-whitespace",
-    "jsdom",
-    "void-elements",
-    "block-elements"
+    "hast"
   ],
   "scripts": {
     "dll:build": "webpack",
 
@@ -57,7 +57,7 @@ export class MarkdownGfmDataProcessor implements DataProcessor {
 	 * @param element The element name to be kept.
 	 */
 	public keepHtml( element: keyof HTMLElementTagNameMap ): void {
-		this._html2markdown.keep( [ element ] );
+		this._html2markdown.keep( element );
 	}
 
 	/**
 
@@ -7,166 +7,104 @@
  * @module markdown-gfm/html2markdown/html2markdown
  */
 
-import Turndown from 'turndown';
+import { unified, type Plugin } from 'unified';
+import rehypeParse from 'rehype-dom-parse';
+import rehypeRemark from 'rehype-remark';
+import remarkBreaks from 'remark-breaks';
+import remarkGfm from 'remark-gfm';
+import remarkStringify from 'remark-stringify';
+import { visit } from 'unist-util-visit';
+import { h } from 'hastscript';
+import { toHtml } from 'hast-util-to-html';
+import type { Handle, State } from 'hast-util-to-mdast';
+import type { Element, Node, Root } from 'hast';
 
-// There no avaialble types for 'turndown-plugin-gfm' module and it's not worth to generate them on our own.
-/* eslint-disable @typescript-eslint/ban-ts-comment */
-// @ts-ignore
-import { gfm } from 'turndown-plugin-gfm';
-
-const autolinkRegex = /* #__PURE__ */ new RegExp(
-	// Prefix.
-	/\b(?:(?:https?|ftp):\/\/|www\.)/.source +
-
-	// Domain name.
-	/(?![-_])(?:[-_a-z0-9\u00a1-\uffff]{1,63}\.)+(?:[a-z\u00a1-\uffff]{2,63})/.source +
-
-	// The rest.
-	/(?:[^\s<>]*)/.source,
-	'gi'
-);
-
-class UpdatedTurndown extends Turndown {
-	public override escape( string: string ): string {
-		const originalEscape = super.escape;
-
-		function escape( string: string ): string {
-			string = originalEscape( string );
-
-			// Escape "<".
-			string = string.replace( /</g, '\\<' );
-
-			return string;
-		}
-
-		// Urls should not be escaped. Our strategy is using a regex to find them and escape everything
-		// which is out of the matches parts.
-
-		let escaped = '';
-		let lastLinkEnd = 0;
-
-		for ( const match of this._matchAutolink( string ) ) {
-			const index = match.index!;
-
-			// Append the substring between the last match and the current one (if anything).
-			if ( index > lastLinkEnd ) {
-				escaped += escape( string.substring( lastLinkEnd, index ) );
-			}
-
-			const matchedURL = match[ 0 ];
-
-			escaped += matchedURL;
-
-			lastLinkEnd = index + matchedURL.length;
-		}
-
-		// Add text after the last link or at the string start if no matches.
-		if ( lastLinkEnd < string.length ) {
-			escaped += escape( string.substring( lastLinkEnd, string.length ) );
-		}
+export class MarkdownGfmHtmlToMd {
+	private _processor: any;
+	private _keepRawTags: Array<string> = [];
 
-		return escaped;
+	constructor() {
+		this._buildProcessor();
 	}
 
-	/**
-	 * Trimming end of link.
-	 * https://github.github.com/gfm/#autolinks-extension-
-	 */
-	private* _matchAutolink( string: string ) {
-		for ( const match of string.matchAll( autolinkRegex ) ) {
-			const matched = match[ 0 ];
-			const length = this._autolinkFindEnd( matched );
-
-			yield Object.assign(
-				[ matched.substring( 0, length ) ],
-				{ index: match.index }
-			);
+	public keep( tagName: string ): void {
+		this._keepRawTags.push( tagName.toLowerCase() );
+		this._buildProcessor();
+	}
 
-			// We could adjust regex.lastIndex but it's not needed because what we skipped is for sure not a valid URL.
-		}
+	public parse( html: string ): string {
+		return this._processor!
+			.processSync( html )
+			.toString()
+			.trim();
 	}
 
 	/**
-	 * Returns the new length of the link (after it would trim trailing characters).
+	 * Returns handlers for raw HTML tags that should be kept in the Markdown output.
 	 */
-	private _autolinkFindEnd( string: string ) {
-		let length = string.length;
-
-		while ( length > 0 ) {
-			const char = string[ length - 1 ];
-
-			if ( '?!.,:*_~\'"'.includes( char ) ) {
-				length--;
-			} else if ( char == ')' ) {
-				let openBrackets = 0;
-
-				for ( let i = 0; i < length; i++ ) {
-					if ( string[ i ] == '(' ) {
-						openBrackets++;
-					} else if ( string[ i ] == ')' ) {
-						openBrackets--;
-					}
-				}
-
-				// If there is fewer opening brackets then closing ones we should remove a closing bracket.
-				if ( openBrackets < 0 ) {
-					length--;
-				} else {
-					break;
-				}
-			} else {
-				break;
-			}
-		}
+	private _getRawTagsHandlers(): Record<string, Handle> {
+		return this._keepRawTags.reduce( ( handlers: Record<string, Handle>, tagName: string ) => {
+			handlers[ tagName ] = ( state: State, node: Element ): any => {
+				const tag = toHtml( h( node.tagName, node.properties ), {
+					allowDangerousHtml: true,
+					closeSelfClosing: true
+				} );
+
+				const endOfOpeningTagIndex = tag.indexOf( '>' );
+				const openingTag = tag.slice( 0, endOfOpeningTagIndex + 1 );
+				const closingTag = tag.slice( endOfOpeningTagIndex + 1 );
+
+				return [
+					{ type: 'html', value: openingTag },
+					...state.all( node ),
+					{ type: 'html', value: closingTag }
+				];
+			};
+			return handlers;
+		}, {} as Record<string, Handle> );
+	}
 
-		return length;
+	private _buildProcessor() {
+		this._processor = unified()
+			// Parse HTML to an abstract syntax tree (AST).
+			.use( rehypeParse )
+			// Removes `<label>` element from TODO lists.
+			.use( removeLabelFromCheckboxes )
+			// Turns HTML syntax tree into Markdown syntax tree.
+			.use( rehypeRemark, {
+				// Keeps allowed HTML tags.
+				handlers: this._getRawTagsHandlers()
+			} )
+			// Adds support for GitHub Flavored Markdown (GFM).
+			.use( remarkGfm, {
+				singleTilde: true
+			} )
+			// Replaces line breaks with `<br>` tags.
+			.use( remarkBreaks )
+			// Serializes Markdown syntax tree to Markdown string.
+			.use( remarkStringify, {
+				resourceLink: true,
+				emphasis: '_',
+				rule: '-',
+				handlers: {
+					break: () => '\n'
+				},
+				unsafe: [
+					{ character: '<' }
+				]
+			} );
 	}
 }
 
 /**
- * This is a helper class used by the {@link module:markdown-gfm/markdown Markdown feature} to convert HTML to Markdown.
+ * Removes `<label>` element from TODO lists, so that `<input>` and `text` are direct children of `<li>`.
  */
-export class MarkdownGfmHtmlToMd {
-	private _parser: UpdatedTurndown;
-
-	constructor() {
-		this._parser = this._createParser();
-	}
-
-	public parse( html: string ): string {
-		return this._parser.turndown( html );
-	}
-
-	public keep( elements: Turndown.Filter ): void {
-		this._parser.keep( elements );
-	}
-
-	private _createParser(): UpdatedTurndown {
-		const parser = new UpdatedTurndown( {
-			codeBlockStyle: 'fenced',
-			hr: '---',
-			headingStyle: 'atx'
-		} );
-
-		parser.use( [
-			gfm,
-			this._todoList
-		] );
-
-		return parser;
-	}
-
-	// This is a copy of the original taskListItems rule from turndown-plugin-gfm, with minor changes.
-	private _todoList( turndown: UpdatedTurndown ): void {
-		turndown.addRule( 'taskListItems', {
-			filter( node: any ) {
-				return node.type === 'checkbox' &&
-					// Changes here as CKEditor outputs a deeper structure.
-					( node.parentNode.nodeName === 'LI' || node.parentNode.parentNode.nodeName === 'LI' );
-			},
-			replacement( content: any, node: any ) {
-				return ( node.checked ? '[x]' : '[ ]' ) + ' ';
+function removeLabelFromCheckboxes(): ReturnType<Plugin> {
+	return function( tree: Node ): void {
+		visit( tree, 'element', ( node: Element, index: number | null, parent: Root | Element ) => {
+			if ( index !== null && node.tagName === 'label' && parent.type === 'element' && parent.tagName === 'li' ) {
+				parent.children.splice( index, 1, ...node.children );
 			}
 		} );
-	}
+	};
 }
Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,7 @@ export class MarkdownGfmDataProcessor implements DataProcessor {`
`57`	`57`	`* @param element The element name to be kept.`
`58`	`58`	`*/`
`59`	`59`	`public keepHtml( element: keyof HTMLElementTagNameMap ): void {`
`60`		`- this._html2markdown.keep( [ element ] );`
	`60`	`+ this._html2markdown.keep( element );`
`61`	`61`	`}`
`62`	`62`
`63`	`63`	`/**`