Skip to content

Commit 2f4cf7b

Browse files
author
Paul Bearne
committed
"Add Markdown feed support with HTML to Markdown converters"
1 parent 0b50baa commit 2f4cf7b

30 files changed

+1913
-0
lines changed

src/wp-includes/default-filters.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,8 @@
416416
add_action( 'do_feed_rss', 'do_feed_rss', 10, 0 );
417417
add_action( 'do_feed_rss2', 'do_feed_rss2', 10, 1 );
418418
add_action( 'do_feed_atom', 'do_feed_atom', 10, 1 );
419+
add_action( 'do_feed_markdown', 'do_feed_markdown', 10, 1 );
420+
add_action( 'init', 'wp_register_markdown_feed' );
419421
add_action( 'do_pings', 'do_all_pings', 10, 0 );
420422
add_action( 'do_all_pings', 'do_all_pingbacks', 10, 0 );
421423
add_action( 'do_all_pings', 'do_all_enclosures', 10, 0 );

src/wp-includes/feed-markdown.php

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
<?php
2+
/**
3+
* Markdown Feed Template for displaying posts as Markdown text.
4+
*
5+
* Accessible via /?feed=markup or /feed/markup/ depending on permalink settings.
6+
*
7+
* @package WordPress
8+
* @since 6.7.0
9+
*/
10+
11+
// Output Markdown so clients can render appropriately.
12+
header( 'Content-Type: text/html; charset=' . get_option( 'blog_charset' ), true );
13+
14+
// Ensure full content is used.
15+
// phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited -- Used intentionally to ensure full content in feed.
16+
$more = 1;
17+
18+
// Register a simple autoloader for the bundled League HTMLToMarkdown library.
19+
spl_autoload_register( function ( $class ) {
20+
if ( 0 === strpos( $class, 'League\\HTMLToMarkdown\\' ) ) {
21+
$relative = str_replace( 'League\\HTMLToMarkdown\\', '', $class );
22+
$relative = str_replace( '\\', DIRECTORY_SEPARATOR, $relative );
23+
$file = ABSPATH . WPINC . '/html-to-markdown/' . $relative . '.php';
24+
if ( file_exists( $file ) ) {
25+
require_once $file;
26+
}
27+
}
28+
} );
29+
30+
// Create a converter instance; tune options if desired.
31+
$__wp_md_converter = new \League\HTMLToMarkdown\HtmlConverter( array(
32+
'header_style' => 'atx',
33+
'suppress_errors' => true,
34+
) );
35+
36+
// Feed header as Markdown.
37+
echo '# ' . wp_strip_all_tags( get_bloginfo( 'name' ) ) . '' . __( 'Markdown Feed', 'default' ) . "\n\n";
38+
$desc = get_bloginfo( 'description' );
39+
if ( $desc ) {
40+
echo wp_strip_all_tags( $desc ) . "\n\n";
41+
}
42+
43+
echo __( 'Feed URL:', 'default' ) . ' <' . esc_url_raw( get_self_link() ) . ">\n\n";
44+
45+
if ( have_posts() ) :
46+
while ( have_posts() ) :
47+
the_post();
48+
49+
$title = wp_strip_all_tags( get_the_title() );
50+
$permalink = get_permalink();
51+
$date_r = get_post_time( 'r', true );
52+
$content = get_post_field( 'post_content', get_the_ID() );
53+
54+
// Post block in Markdown.
55+
echo "## [" . $title . "]("
56+
. $permalink . ")\n";
57+
echo '*' . __( 'Published:', 'default' ) . '* ' . $date_r . "\n\n";
58+
59+
$html = apply_filters( 'the_content', $content );
60+
$md = $__wp_md_converter->convert( (string) $html );
61+
$md = trim( $md );
62+
if ( $md !== '' ) {
63+
echo $md . "\n\n";
64+
}
65+
66+
// Separator.
67+
echo "---\n\n";
68+
endwhile;
69+
else :
70+
echo __( 'No posts found.', 'default' );
71+
endif;

src/wp-includes/functions.php

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1697,6 +1697,31 @@ function do_feed_atom( $for_comments ) {
16971697
}
16981698
}
16991699

1700+
/**
1701+
* Loads the MarkDown Feed Template.
1702+
*
1703+
* A simple HTML feed that outputs post content with MarkDown preserved.
1704+
*
1705+
* @since 6.7.0
1706+
*
1707+
* @see load_template()
1708+
*
1709+
* @param bool $for_comments Unused. Present for parity with other feed handlers.
1710+
*/
1711+
function do_feed_markdown( $for_comments ) { // phpcs:ignore VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable
1712+
1713+
load_template( ABSPATH . WPINC . '/feed-markdown.php' );
1714+
}
1715+
1716+
/**
1717+
* Registers the Markup feed rewrite and hook.
1718+
*
1719+
* @since 6.7.0
1720+
*/
1721+
function wp_register_markdown_feed() {
1722+
add_feed( 'markdown', 'do_feed_markdown' );
1723+
}
1724+
17001725
/**
17011726
* Displays the default robots.txt file content.
17021727
*
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace League\HTMLToMarkdown;
6+
7+
/**
8+
* @internal
9+
*/
10+
final class Coerce
11+
{
12+
private function __construct()
13+
{
14+
}
15+
16+
/**
17+
* @param mixed $val
18+
*/
19+
public static function toString($val): string
20+
{
21+
switch (true) {
22+
case \is_string($val):
23+
return $val;
24+
case \is_bool($val):
25+
case \is_float($val):
26+
case \is_int($val):
27+
case $val === null:
28+
return \strval($val);
29+
case \is_object($val) && \method_exists($val, '__toString'):
30+
return $val->__toString();
31+
default:
32+
throw new \InvalidArgumentException('Cannot coerce this value to string');
33+
}
34+
}
35+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace League\HTMLToMarkdown;
6+
7+
class Configuration
8+
{
9+
/** @var array<string, mixed> */
10+
protected $config;
11+
12+
/**
13+
* @param array<string, mixed> $config
14+
*/
15+
public function __construct(array $config = [])
16+
{
17+
$this->config = $config;
18+
19+
$this->checkForDeprecatedOptions($config);
20+
}
21+
22+
/**
23+
* @param array<string, mixed> $config
24+
*/
25+
public function merge(array $config = []): void
26+
{
27+
$this->checkForDeprecatedOptions($config);
28+
$this->config = \array_replace_recursive($this->config, $config);
29+
}
30+
31+
/**
32+
* @param array<string, mixed> $config
33+
*/
34+
public function replace(array $config = []): void
35+
{
36+
$this->checkForDeprecatedOptions($config);
37+
$this->config = $config;
38+
}
39+
40+
/**
41+
* @param mixed $value
42+
*/
43+
public function setOption(string $key, $value): void
44+
{
45+
$this->checkForDeprecatedOptions([$key => $value]);
46+
$this->config[$key] = $value;
47+
}
48+
49+
/**
50+
* @param mixed|null $default
51+
*
52+
* @return mixed|null
53+
*/
54+
public function getOption(?string $key = null, $default = null)
55+
{
56+
if ($key === null) {
57+
return $this->config;
58+
}
59+
60+
if (! isset($this->config[$key])) {
61+
return $default;
62+
}
63+
64+
return $this->config[$key];
65+
}
66+
67+
/**
68+
* @param array<string, mixed> $config
69+
*/
70+
private function checkForDeprecatedOptions(array $config): void
71+
{
72+
foreach ($config as $key => $value) {
73+
if ($key === 'bold_style' && $value !== '**') {
74+
@\trigger_error('Customizing the bold_style option is deprecated and may be removed in the next major version', E_USER_DEPRECATED);
75+
} elseif ($key === 'italic_style' && $value !== '*') {
76+
@\trigger_error('Customizing the italic_style option is deprecated and may be removed in the next major version', E_USER_DEPRECATED);
77+
}
78+
}
79+
}
80+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace League\HTMLToMarkdown;
6+
7+
interface ConfigurationAwareInterface
8+
{
9+
public function setConfig(Configuration $config): void;
10+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace League\HTMLToMarkdown\Converter;
6+
7+
use League\HTMLToMarkdown\ElementInterface;
8+
9+
class BlockquoteConverter implements ConverterInterface
10+
{
11+
public function convert(ElementInterface $element): string
12+
{
13+
// Contents should have already been converted to Markdown by this point,
14+
// so we just need to add '>' symbols to each line.
15+
16+
$markdown = '';
17+
18+
$quoteContent = \trim($element->getValue());
19+
20+
$lines = \preg_split('/\r\n|\r|\n/', $quoteContent);
21+
\assert(\is_array($lines));
22+
23+
$totalLines = \count($lines);
24+
25+
foreach ($lines as $i => $line) {
26+
$markdown .= '> ' . $line . "\n";
27+
if ($i + 1 === $totalLines) {
28+
$markdown .= "\n";
29+
}
30+
}
31+
32+
return $markdown;
33+
}
34+
35+
/**
36+
* @return string[]
37+
*/
38+
public function getSupportedTags(): array
39+
{
40+
return ['blockquote'];
41+
}
42+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace League\HTMLToMarkdown\Converter;
6+
7+
use League\HTMLToMarkdown\ElementInterface;
8+
9+
class CodeConverter implements ConverterInterface
10+
{
11+
public function convert(ElementInterface $element): string
12+
{
13+
$language = '';
14+
15+
// Checking for language class on the code block
16+
$classes = $element->getAttribute('class');
17+
18+
if ($classes) {
19+
// Since tags can have more than one class, we need to find the one that starts with 'language-'
20+
$classes = \explode(' ', $classes);
21+
foreach ($classes as $class) {
22+
if (\strpos($class, 'language-') !== false) {
23+
// Found one, save it as the selected language and stop looping over the classes.
24+
$language = \str_replace('language-', '', $class);
25+
break;
26+
}
27+
}
28+
}
29+
30+
$markdown = '';
31+
$code = \html_entity_decode($element->getChildrenAsString());
32+
33+
// In order to remove the code tags we need to search for them and, in the case of the opening tag
34+
// use a regular expression to find the tag and the other attributes it might have
35+
$code = \preg_replace('/<code\b[^>]*>/', '', $code);
36+
\assert($code !== null);
37+
$code = \str_replace('</code>', '', $code);
38+
39+
// Checking if it's a code block or span
40+
if ($this->shouldBeBlock($element, $code)) {
41+
// Code block detected, newlines will be added in parent
42+
$markdown .= '```' . $language . "\n" . $code . "\n" . '```';
43+
} else {
44+
// One line of code, wrapping it on one backtick, removing new lines
45+
$markdown .= '`' . \preg_replace('/\r\n|\r|\n/', '', $code) . '`';
46+
}
47+
48+
return $markdown;
49+
}
50+
51+
/**
52+
* @return string[]
53+
*/
54+
public function getSupportedTags(): array
55+
{
56+
return ['code'];
57+
}
58+
59+
private function shouldBeBlock(ElementInterface $element, string $code): bool
60+
{
61+
$parent = $element->getParent();
62+
if ($parent !== null && $parent->getTagName() === 'pre') {
63+
return true;
64+
}
65+
66+
return \preg_match('/[^\s]` `/', $code) === 1;
67+
}
68+
}

0 commit comments

Comments
 (0)