From 2f4cf7bb793cfc4f05832984b8dfb299a7b2ddf3 Mon Sep 17 00:00:00 2001 From: Paul Bearne Date: Thu, 4 Dec 2025 18:46:33 -0500 Subject: [PATCH] "Add Markdown feed support with HTML to Markdown converters" --- src/wp-includes/default-filters.php | 2 + src/wp-includes/feed-markdown.php | 71 +++++ src/wp-includes/functions.php | 25 ++ src/wp-includes/html-to-markdown/Coerce.php | 35 +++ .../html-to-markdown/Configuration.php | 80 +++++ .../ConfigurationAwareInterface.php | 10 + .../Converter/BlockquoteConverter.php | 42 +++ .../Converter/CodeConverter.php | 68 +++++ .../Converter/CommentConverter.php | 53 ++++ .../Converter/ConverterInterface.php | 17 ++ .../Converter/DefaultConverter.php | 49 ++++ .../Converter/DivConverter.php | 37 +++ .../Converter/EmphasisConverter.php | 72 +++++ .../Converter/HardBreakConverter.php | 48 +++ .../Converter/HeaderConverter.php | 62 ++++ .../Converter/HorizontalRuleConverter.php | 23 ++ .../Converter/ImageConverter.php | 32 ++ .../Converter/LinkConverter.php | 77 +++++ .../Converter/ListBlockConverter.php | 23 ++ .../Converter/ListItemConverter.php | 71 +++++ .../Converter/ParagraphConverter.php | 108 +++++++ .../Converter/PreformattedConverter.php | 58 ++++ .../Converter/TableConverter.php | 114 +++++++ .../Converter/TextConverter.php | 48 +++ src/wp-includes/html-to-markdown/Element.php | 233 +++++++++++++++ .../html-to-markdown/ElementInterface.php | 50 ++++ .../html-to-markdown/Environment.php | 92 ++++++ .../html-to-markdown/HtmlConverter.php | 277 ++++++++++++++++++ .../HtmlConverterInterface.php | 26 ++ .../PreConverterInterface.php | 10 + 30 files changed, 1913 insertions(+) create mode 100644 src/wp-includes/feed-markdown.php create mode 100644 src/wp-includes/html-to-markdown/Coerce.php create mode 100644 src/wp-includes/html-to-markdown/Configuration.php create mode 100644 src/wp-includes/html-to-markdown/ConfigurationAwareInterface.php create mode 100644 src/wp-includes/html-to-markdown/Converter/BlockquoteConverter.php create mode 100644 src/wp-includes/html-to-markdown/Converter/CodeConverter.php create mode 100644 src/wp-includes/html-to-markdown/Converter/CommentConverter.php create mode 100644 src/wp-includes/html-to-markdown/Converter/ConverterInterface.php create mode 100644 src/wp-includes/html-to-markdown/Converter/DefaultConverter.php create mode 100644 src/wp-includes/html-to-markdown/Converter/DivConverter.php create mode 100644 src/wp-includes/html-to-markdown/Converter/EmphasisConverter.php create mode 100644 src/wp-includes/html-to-markdown/Converter/HardBreakConverter.php create mode 100644 src/wp-includes/html-to-markdown/Converter/HeaderConverter.php create mode 100644 src/wp-includes/html-to-markdown/Converter/HorizontalRuleConverter.php create mode 100644 src/wp-includes/html-to-markdown/Converter/ImageConverter.php create mode 100644 src/wp-includes/html-to-markdown/Converter/LinkConverter.php create mode 100644 src/wp-includes/html-to-markdown/Converter/ListBlockConverter.php create mode 100644 src/wp-includes/html-to-markdown/Converter/ListItemConverter.php create mode 100644 src/wp-includes/html-to-markdown/Converter/ParagraphConverter.php create mode 100644 src/wp-includes/html-to-markdown/Converter/PreformattedConverter.php create mode 100644 src/wp-includes/html-to-markdown/Converter/TableConverter.php create mode 100644 src/wp-includes/html-to-markdown/Converter/TextConverter.php create mode 100644 src/wp-includes/html-to-markdown/Element.php create mode 100644 src/wp-includes/html-to-markdown/ElementInterface.php create mode 100644 src/wp-includes/html-to-markdown/Environment.php create mode 100644 src/wp-includes/html-to-markdown/HtmlConverter.php create mode 100644 src/wp-includes/html-to-markdown/HtmlConverterInterface.php create mode 100644 src/wp-includes/html-to-markdown/PreConverterInterface.php diff --git a/src/wp-includes/default-filters.php b/src/wp-includes/default-filters.php index 68dccd979f2fe..2b159c1117ab5 100644 --- a/src/wp-includes/default-filters.php +++ b/src/wp-includes/default-filters.php @@ -416,6 +416,8 @@ add_action( 'do_feed_rss', 'do_feed_rss', 10, 0 ); add_action( 'do_feed_rss2', 'do_feed_rss2', 10, 1 ); add_action( 'do_feed_atom', 'do_feed_atom', 10, 1 ); +add_action( 'do_feed_markdown', 'do_feed_markdown', 10, 1 ); +add_action( 'init', 'wp_register_markdown_feed' ); add_action( 'do_pings', 'do_all_pings', 10, 0 ); add_action( 'do_all_pings', 'do_all_pingbacks', 10, 0 ); add_action( 'do_all_pings', 'do_all_enclosures', 10, 0 ); diff --git a/src/wp-includes/feed-markdown.php b/src/wp-includes/feed-markdown.php new file mode 100644 index 0000000000000..5caca636a2470 --- /dev/null +++ b/src/wp-includes/feed-markdown.php @@ -0,0 +1,71 @@ + 'atx', + 'suppress_errors' => true, +) ); + +// Feed header as Markdown. +echo '# ' . wp_strip_all_tags( get_bloginfo( 'name' ) ) . ' — ' . __( 'Markdown Feed', 'default' ) . "\n\n"; +$desc = get_bloginfo( 'description' ); +if ( $desc ) { + echo wp_strip_all_tags( $desc ) . "\n\n"; +} + +echo __( 'Feed URL:', 'default' ) . ' <' . esc_url_raw( get_self_link() ) . ">\n\n"; + +if ( have_posts() ) : + while ( have_posts() ) : + the_post(); + + $title = wp_strip_all_tags( get_the_title() ); + $permalink = get_permalink(); + $date_r = get_post_time( 'r', true ); + $content = get_post_field( 'post_content', get_the_ID() ); + + // Post block in Markdown. + echo "## [" . $title . "](" + . $permalink . ")\n"; + echo '*' . __( 'Published:', 'default' ) . '* ' . $date_r . "\n\n"; + + $html = apply_filters( 'the_content', $content ); + $md = $__wp_md_converter->convert( (string) $html ); + $md = trim( $md ); + if ( $md !== '' ) { + echo $md . "\n\n"; + } + + // Separator. + echo "---\n\n"; + endwhile; +else : + echo __( 'No posts found.', 'default' ); +endif; diff --git a/src/wp-includes/functions.php b/src/wp-includes/functions.php index 68a29f444fd48..dd536a6b4e75a 100644 --- a/src/wp-includes/functions.php +++ b/src/wp-includes/functions.php @@ -1697,6 +1697,31 @@ function do_feed_atom( $for_comments ) { } } +/** + * Loads the MarkDown Feed Template. + * + * A simple HTML feed that outputs post content with MarkDown preserved. + * + * @since 6.7.0 + * + * @see load_template() + * + * @param bool $for_comments Unused. Present for parity with other feed handlers. + */ +function do_feed_markdown( $for_comments ) { // phpcs:ignore VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable + + load_template( ABSPATH . WPINC . '/feed-markdown.php' ); +} + +/** + * Registers the Markup feed rewrite and hook. + * + * @since 6.7.0 + */ +function wp_register_markdown_feed() { + add_feed( 'markdown', 'do_feed_markdown' ); +} + /** * Displays the default robots.txt file content. * diff --git a/src/wp-includes/html-to-markdown/Coerce.php b/src/wp-includes/html-to-markdown/Coerce.php new file mode 100644 index 0000000000000..4fb0450a06183 --- /dev/null +++ b/src/wp-includes/html-to-markdown/Coerce.php @@ -0,0 +1,35 @@ +__toString(); + default: + throw new \InvalidArgumentException('Cannot coerce this value to string'); + } + } +} diff --git a/src/wp-includes/html-to-markdown/Configuration.php b/src/wp-includes/html-to-markdown/Configuration.php new file mode 100644 index 0000000000000..7e1d71e687e71 --- /dev/null +++ b/src/wp-includes/html-to-markdown/Configuration.php @@ -0,0 +1,80 @@ + */ + protected $config; + + /** + * @param array $config + */ + public function __construct(array $config = []) + { + $this->config = $config; + + $this->checkForDeprecatedOptions($config); + } + + /** + * @param array $config + */ + public function merge(array $config = []): void + { + $this->checkForDeprecatedOptions($config); + $this->config = \array_replace_recursive($this->config, $config); + } + + /** + * @param array $config + */ + public function replace(array $config = []): void + { + $this->checkForDeprecatedOptions($config); + $this->config = $config; + } + + /** + * @param mixed $value + */ + public function setOption(string $key, $value): void + { + $this->checkForDeprecatedOptions([$key => $value]); + $this->config[$key] = $value; + } + + /** + * @param mixed|null $default + * + * @return mixed|null + */ + public function getOption(?string $key = null, $default = null) + { + if ($key === null) { + return $this->config; + } + + if (! isset($this->config[$key])) { + return $default; + } + + return $this->config[$key]; + } + + /** + * @param array $config + */ + private function checkForDeprecatedOptions(array $config): void + { + foreach ($config as $key => $value) { + if ($key === 'bold_style' && $value !== '**') { + @\trigger_error('Customizing the bold_style option is deprecated and may be removed in the next major version', E_USER_DEPRECATED); + } elseif ($key === 'italic_style' && $value !== '*') { + @\trigger_error('Customizing the italic_style option is deprecated and may be removed in the next major version', E_USER_DEPRECATED); + } + } + } +} diff --git a/src/wp-includes/html-to-markdown/ConfigurationAwareInterface.php b/src/wp-includes/html-to-markdown/ConfigurationAwareInterface.php new file mode 100644 index 0000000000000..50c004c4a86aa --- /dev/null +++ b/src/wp-includes/html-to-markdown/ConfigurationAwareInterface.php @@ -0,0 +1,10 @@ +' symbols to each line. + + $markdown = ''; + + $quoteContent = \trim($element->getValue()); + + $lines = \preg_split('/\r\n|\r|\n/', $quoteContent); + \assert(\is_array($lines)); + + $totalLines = \count($lines); + + foreach ($lines as $i => $line) { + $markdown .= '> ' . $line . "\n"; + if ($i + 1 === $totalLines) { + $markdown .= "\n"; + } + } + + return $markdown; + } + + /** + * @return string[] + */ + public function getSupportedTags(): array + { + return ['blockquote']; + } +} diff --git a/src/wp-includes/html-to-markdown/Converter/CodeConverter.php b/src/wp-includes/html-to-markdown/Converter/CodeConverter.php new file mode 100644 index 0000000000000..40eb7f85a18de --- /dev/null +++ b/src/wp-includes/html-to-markdown/Converter/CodeConverter.php @@ -0,0 +1,68 @@ +getAttribute('class'); + + if ($classes) { + // Since tags can have more than one class, we need to find the one that starts with 'language-' + $classes = \explode(' ', $classes); + foreach ($classes as $class) { + if (\strpos($class, 'language-') !== false) { + // Found one, save it as the selected language and stop looping over the classes. + $language = \str_replace('language-', '', $class); + break; + } + } + } + + $markdown = ''; + $code = \html_entity_decode($element->getChildrenAsString()); + + // In order to remove the code tags we need to search for them and, in the case of the opening tag + // use a regular expression to find the tag and the other attributes it might have + $code = \preg_replace('/]*>/', '', $code); + \assert($code !== null); + $code = \str_replace('', '', $code); + + // Checking if it's a code block or span + if ($this->shouldBeBlock($element, $code)) { + // Code block detected, newlines will be added in parent + $markdown .= '```' . $language . "\n" . $code . "\n" . '```'; + } else { + // One line of code, wrapping it on one backtick, removing new lines + $markdown .= '`' . \preg_replace('/\r\n|\r|\n/', '', $code) . '`'; + } + + return $markdown; + } + + /** + * @return string[] + */ + public function getSupportedTags(): array + { + return ['code']; + } + + private function shouldBeBlock(ElementInterface $element, string $code): bool + { + $parent = $element->getParent(); + if ($parent !== null && $parent->getTagName() === 'pre') { + return true; + } + + return \preg_match('/[^\s]` `/', $code) === 1; + } +} diff --git a/src/wp-includes/html-to-markdown/Converter/CommentConverter.php b/src/wp-includes/html-to-markdown/Converter/CommentConverter.php new file mode 100644 index 0000000000000..c69dea551d82c --- /dev/null +++ b/src/wp-includes/html-to-markdown/Converter/CommentConverter.php @@ -0,0 +1,53 @@ +config = $config; + } + + public function convert(ElementInterface $element): string + { + if ($this->shouldPreserve($element)) { + return ''; + } + + return ''; + } + + /** + * @return string[] + */ + public function getSupportedTags(): array + { + return ['#comment']; + } + + private function shouldPreserve(ElementInterface $element): bool + { + $preserve = $this->config->getOption('preserve_comments'); + if ($preserve === true) { + return true; + } + + if (\is_array($preserve)) { + $value = \trim($element->getValue()); + + return \in_array($value, $preserve, true); + } + + return false; + } +} diff --git a/src/wp-includes/html-to-markdown/Converter/ConverterInterface.php b/src/wp-includes/html-to-markdown/Converter/ConverterInterface.php new file mode 100644 index 0000000000000..f10498578e010 --- /dev/null +++ b/src/wp-includes/html-to-markdown/Converter/ConverterInterface.php @@ -0,0 +1,17 @@ +config = $config; + } + + public function convert(ElementInterface $element): string + { + // If strip_tags is false (the default), preserve tags that don't have Markdown equivalents, + // such as nodes on their own. C14N() canonicalizes the node to a string. + // See: http://www.php.net/manual/en/domnode.c14n.php + if ($this->config->getOption('strip_tags', false)) { + return $element->getValue(); + } + + $markdown = \html_entity_decode($element->getChildrenAsString()); + + // Tables are only handled here if TableConverter is not used + if ($element->getTagName() === 'table') { + $markdown .= "\n\n"; + } + + return $markdown; + } + + /** + * @return string[] + */ + public function getSupportedTags(): array + { + return [self::DEFAULT_CONVERTER]; + } +} diff --git a/src/wp-includes/html-to-markdown/Converter/DivConverter.php b/src/wp-includes/html-to-markdown/Converter/DivConverter.php new file mode 100644 index 0000000000000..6453a2a27c70e --- /dev/null +++ b/src/wp-includes/html-to-markdown/Converter/DivConverter.php @@ -0,0 +1,37 @@ +config = $config; + } + + public function convert(ElementInterface $element): string + { + if ($this->config->getOption('strip_tags', false)) { + return $element->getValue() . "\n\n"; + } + + return \html_entity_decode($element->getChildrenAsString()); + } + + /** + * @return string[] + */ + public function getSupportedTags(): array + { + return ['div']; + } +} diff --git a/src/wp-includes/html-to-markdown/Converter/EmphasisConverter.php b/src/wp-includes/html-to-markdown/Converter/EmphasisConverter.php new file mode 100644 index 0000000000000..a122f40527dac --- /dev/null +++ b/src/wp-includes/html-to-markdown/Converter/EmphasisConverter.php @@ -0,0 +1,72 @@ +isText()) { + $tag = $element->getTagName(); + if ($tag === 'i' || $tag === 'em') { + return 'em'; + } + + if ($tag === 'b' || $tag === 'strong') { + return 'strong'; + } + } + + return ''; + } + + public function setConfig(Configuration $config): void + { + $this->config = $config; + } + + public function convert(ElementInterface $element): string + { + $tag = $this->getNormTag($element); + $value = $element->getValue(); + + if (! \trim($value)) { + return $value; + } + + if ($tag === 'em') { + $style = $this->config->getOption('italic_style'); + } else { + $style = $this->config->getOption('bold_style'); + } + + $prefix = \ltrim($value) !== $value ? ' ' : ''; + $suffix = \rtrim($value) !== $value ? ' ' : ''; + + /* If this node is immediately preceded or followed by one of the same type don't emit + * the start or end $style, respectively. This prevents foobar from + * being converted to *foo**bar* which is incorrect. We want *foobar* instead. + */ + $preStyle = $this->getNormTag($element->getPreviousSibling()) === $tag ? '' : $style; + $postStyle = $this->getNormTag($element->getNextSibling()) === $tag ? '' : $style; + + return $prefix . $preStyle . \trim($value) . $postStyle . $suffix; + } + + /** + * @return string[] + */ + public function getSupportedTags(): array + { + return ['em', 'i', 'strong', 'b']; + } +} diff --git a/src/wp-includes/html-to-markdown/Converter/HardBreakConverter.php b/src/wp-includes/html-to-markdown/Converter/HardBreakConverter.php new file mode 100644 index 0000000000000..45e89682eab1f --- /dev/null +++ b/src/wp-includes/html-to-markdown/Converter/HardBreakConverter.php @@ -0,0 +1,48 @@ +config = $config; + } + + public function convert(ElementInterface $element): string + { + $return = $this->config->getOption('hard_break') ? "\n" : " \n"; + + $next = $element->getNext(); + if ($next) { + $nextValue = $next->getValue(); + if ($nextValue) { + if (\in_array(\substr($nextValue, 0, 2), ['- ', '* ', '+ '], true)) { + $parent = $element->getParent(); + if ($parent && $parent->getTagName() === 'li') { + $return .= '\\'; + } + } + } + } + + return $return; + } + + /** + * @return string[] + */ + public function getSupportedTags(): array + { + return ['br']; + } +} diff --git a/src/wp-includes/html-to-markdown/Converter/HeaderConverter.php b/src/wp-includes/html-to-markdown/Converter/HeaderConverter.php new file mode 100644 index 0000000000000..e99dfa0f435c6 --- /dev/null +++ b/src/wp-includes/html-to-markdown/Converter/HeaderConverter.php @@ -0,0 +1,62 @@ +config = $config; + } + + public function convert(ElementInterface $element): string + { + $level = (int) \substr($element->getTagName(), 1, 1); + $style = $this->config->getOption('header_style', self::STYLE_SETEXT); + + if (\strlen($element->getValue()) === 0) { + return "\n"; + } + + if (($level === 1 || $level === 2) && ! $element->isDescendantOf('blockquote') && $style === self::STYLE_SETEXT) { + return $this->createSetextHeader($level, $element->getValue()); + } + + return $this->createAtxHeader($level, $element->getValue()); + } + + /** + * @return string[] + */ + public function getSupportedTags(): array + { + return ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']; + } + + private function createSetextHeader(int $level, string $content): string + { + $length = \function_exists('mb_strlen') ? \mb_strlen($content, 'utf-8') : \strlen($content); + $underline = $level === 1 ? '=' : '-'; + + return $content . "\n" . \str_repeat($underline, $length) . "\n\n"; + } + + private function createAtxHeader(int $level, string $content): string + { + $prefix = \str_repeat('#', $level) . ' '; + + return $prefix . $content . "\n\n"; + } +} diff --git a/src/wp-includes/html-to-markdown/Converter/HorizontalRuleConverter.php b/src/wp-includes/html-to-markdown/Converter/HorizontalRuleConverter.php new file mode 100644 index 0000000000000..a2b1ac14a85f8 --- /dev/null +++ b/src/wp-includes/html-to-markdown/Converter/HorizontalRuleConverter.php @@ -0,0 +1,23 @@ +getAttribute('src'); + $alt = $element->getAttribute('alt'); + $title = $element->getAttribute('title'); + + if ($title !== '') { + // No newlines added. should be in a block-level element. + return '![' . $alt . '](' . $src . ' "' . $title . '")'; + } + + return '![' . $alt . '](' . $src . ')'; + } + + /** + * @return string[] + */ + public function getSupportedTags(): array + { + return ['img']; + } +} diff --git a/src/wp-includes/html-to-markdown/Converter/LinkConverter.php b/src/wp-includes/html-to-markdown/Converter/LinkConverter.php new file mode 100644 index 0000000000000..f0ba157f89364 --- /dev/null +++ b/src/wp-includes/html-to-markdown/Converter/LinkConverter.php @@ -0,0 +1,77 @@ +config = $config; + } + + public function convert(ElementInterface $element): string + { + $href = $element->getAttribute('href'); + $title = $element->getAttribute('title'); + $text = \trim($element->getValue(), "\t\n\r\0\x0B"); + + if ($title !== '') { + $markdown = '[' . $text . '](' . $href . ' "' . $title . '")'; + } elseif ($href === $text && $this->isValidAutolink($href)) { + $markdown = '<' . $href . '>'; + } elseif ($href === 'mailto:' . $text && $this->isValidEmail($text)) { + $markdown = '<' . $text . '>'; + } else { + if (\stristr($href, ' ')) { + $href = '<' . $href . '>'; + } + + $markdown = '[' . $text . '](' . $href . ')'; + } + + if (! $href) { + if ($this->shouldStrip()) { + $markdown = $text; + } else { + $markdown = \html_entity_decode($element->getChildrenAsString()); + } + } + + return $markdown; + } + + /** + * @return string[] + */ + public function getSupportedTags(): array + { + return ['a']; + } + + private function isValidAutolink(string $href): bool + { + $useAutolinks = $this->config->getOption('use_autolinks'); + + return $useAutolinks && (\preg_match('/^[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*/i', $href) === 1); + } + + private function isValidEmail(string $email): bool + { + // Email validation is messy business, but this should cover most cases + return \filter_var($email, FILTER_VALIDATE_EMAIL) !== false; + } + + private function shouldStrip(): bool + { + return \boolval($this->config->getOption('strip_placeholder_links') ?? false); + } +} diff --git a/src/wp-includes/html-to-markdown/Converter/ListBlockConverter.php b/src/wp-includes/html-to-markdown/Converter/ListBlockConverter.php new file mode 100644 index 0000000000000..ce7b94654d65b --- /dev/null +++ b/src/wp-includes/html-to-markdown/Converter/ListBlockConverter.php @@ -0,0 +1,23 @@ +getValue() . "\n"; + } + + /** + * @return string[] + */ + public function getSupportedTags(): array + { + return ['ol', 'ul']; + } +} diff --git a/src/wp-includes/html-to-markdown/Converter/ListItemConverter.php b/src/wp-includes/html-to-markdown/Converter/ListItemConverter.php new file mode 100644 index 0000000000000..d71f601e157e0 --- /dev/null +++ b/src/wp-includes/html-to-markdown/Converter/ListItemConverter.php @@ -0,0 +1,71 @@ +config = $config; + } + + public function convert(ElementInterface $element): string + { + // If parent is an ol, use numbers, otherwise, use dashes + $listType = ($parent = $element->getParent()) ? $parent->getTagName() : 'ul'; + + // Add spaces to start for nested list items + $level = $element->getListItemLevel(); + + $value = \trim(\implode("\n" . ' ', \explode("\n", \trim($element->getValue())))); + + // If list item is the first in a nested list, add a newline before it + $prefix = ''; + if ($level > 0 && $element->getSiblingPosition() === 1) { + $prefix = "\n"; + } + + if ($listType === 'ul') { + $listItemStyle = Coerce::toString($this->config->getOption('list_item_style', '-')); + $listItemStyleAlternate = Coerce::toString($this->config->getOption('list_item_style_alternate', '')); + if (! isset($this->listItemStyle)) { + $this->listItemStyle = $listItemStyleAlternate ?: $listItemStyle; + } + + if ($listItemStyleAlternate && $level === 0 && $element->getSiblingPosition() === 1) { + $this->listItemStyle = $this->listItemStyle === $listItemStyle ? $listItemStyleAlternate : $listItemStyle; + } + + return $prefix . $this->listItemStyle . ' ' . $value . "\n"; + } + + if ($listType === 'ol' && ($parent = $element->getParent()) && ($start = \intval($parent->getAttribute('start')))) { + $number = $start + $element->getSiblingPosition() - 1; + } else { + $number = $element->getSiblingPosition(); + } + + return $prefix . $number . '. ' . $value . "\n"; + } + + /** + * @return string[] + */ + public function getSupportedTags(): array + { + return ['li']; + } +} diff --git a/src/wp-includes/html-to-markdown/Converter/ParagraphConverter.php b/src/wp-includes/html-to-markdown/Converter/ParagraphConverter.php new file mode 100644 index 0000000000000..65b37a4dba8c1 --- /dev/null +++ b/src/wp-includes/html-to-markdown/Converter/ParagraphConverter.php @@ -0,0 +1,108 @@ +getValue(); + + $markdown = ''; + + $lines = \preg_split('/\r\n|\r|\n/', $value); + \assert($lines !== false); + + foreach ($lines as $line) { + /* + * Some special characters need to be escaped based on the position that they appear + * The following function will deal with those special cases. + */ + $markdown .= $this->escapeSpecialCharacters($line); + $markdown .= "\n"; + } + + return \trim($markdown) !== '' ? \rtrim($markdown) . "\n\n" : ''; + } + + /** + * @return string[] + */ + public function getSupportedTags(): array + { + return ['p']; + } + + private function escapeSpecialCharacters(string $line): string + { + $line = $this->escapeFirstCharacters($line); + $line = $this->escapeOtherCharacters($line); + $line = $this->escapeOtherCharactersRegex($line); + + return $line; + } + + private function escapeFirstCharacters(string $line): string + { + $escapable = [ + '>', + '- ', + '+ ', + '--', + '~~~', + '---', + '- - -', + ]; + + foreach ($escapable as $i) { + if (\strpos(\ltrim($line), $i) === 0) { + // Found a character that must be escaped, adding a backslash before + return '\\' . \ltrim($line); + } + } + + return $line; + } + + private function escapeOtherCharacters(string $line): string + { + $escapable = [ + '