Skip to content

Commit 2008088

Browse files
committed
RSS: Copy $summary to $description on Atom feeds
Otherwise $description would remain feedparser's default, which is unescaped <content>; but $description is the only usable one on RSS feeds.
1 parent 04f0d70 commit 2008088

File tree

2 files changed

+65
-20
lines changed

2 files changed

+65
-20
lines changed

plugins/RSS/plugin.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,19 @@ def format_entry(self, network, channel, feed, entry, is_announce):
526526
if isinstance(item, dict) and 'value' in item:
527527
value = item['value']
528528
kwargs[key] = value
529+
530+
for key in ('summary', 'title'):
531+
detail = kwargs.get('%s_detail' % key)
532+
if isinstance(detail, dict) and detail.get('type') in \
533+
('text/html', 'application/xhtml+xml'):
534+
kwargs[key] = utils.web.htmlToText(detail['value'])
535+
536+
if 'description' not in kwargs and kwargs[key]:
537+
kwargs['description'] = kwargs[key]
538+
539+
if 'description' not in kwargs and kwargs.get('content'):
540+
kwargs['description'] = kwargs['content']
541+
529542
s = string.Template(template).safe_substitute(entry, **kwargs, date=date)
530543
return self._normalize_entry(s)
531544

plugins/RSS/test.py

Lines changed: 52 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -359,97 +359,129 @@ def testDescription(self, mock):
359359
'On the other hand, the refractor\'s')
360360

361361
@mock_urllib
362-
def testContentHtmlOnly(self, mock):
362+
def testAtomContentHtmlOnly(self, mock):
363363
timeFastForward(1.1)
364-
with conf.supybot.plugins.RSS.format.context('$content'):
365-
mock._data = """
364+
mock._data = """
366365
<?xml version="1.0" encoding="UTF-8"?>
367366
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" xml:lang="en-US">
368367
<title>Recent Commits to anope:2.0</title>
369368
<updated>2023-10-04T16:14:39Z</updated>
370369
<entry>
371-
<title>title with &lt;pre&gt;HTML&lt;pre&gt;</title>
370+
<title>title with &lt;pre&gt;HTML&lt;/pre&gt;</title>
372371
<updated>2023-10-04T16:14:39Z</updated>
373372
<content type="html">
374-
content with &lt;pre&gt;HTML&lt;pre&gt;
373+
content with &lt;pre&gt;HTML&lt;/pre&gt;
375374
</content>
376375
</entry>
377376
</feed>"""
377+
with conf.supybot.plugins.RSS.format.context('$content'):
378+
self.assertRegexp('rss https://example.org',
379+
'content with HTML')
380+
with conf.supybot.plugins.RSS.format.context('$description'):
378381
self.assertRegexp('rss https://example.org',
379382
'content with HTML')
380383

381384
@mock_urllib
382-
def testContentXhtmlOnly(self, mock):
385+
def testAtomContentXhtmlOnly(self, mock):
383386
timeFastForward(1.1)
384-
with conf.supybot.plugins.RSS.format.context('$content'):
385-
mock._data = """
387+
mock._data = """
386388
<?xml version="1.0" encoding="UTF-8"?>
387389
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" xml:lang="en-US">
388390
<title>Recent Commits to anope:2.0</title>
389391
<updated>2023-10-04T16:14:39Z</updated>
390392
<entry>
391-
<title>title with &lt;pre&gt;HTML&lt;pre&gt;</title>
393+
<title>title with &lt;pre&gt;HTML&lt;/pre&gt;</title>
392394
<updated>2023-10-04T16:14:39Z</updated>
393395
<content type="xhtml">
394396
<div xmlns="http://www.w3.org/1999/xhtml">
395-
content with <pre>XHTML<pre>
397+
content with <pre>XHTML</pre>
396398
</div>
397399
</content>
398400
</entry>
399401
</feed>"""
402+
with conf.supybot.plugins.RSS.format.context('$content'):
403+
self.assertRegexp('rss https://example.org',
404+
'content with XHTML')
405+
with conf.supybot.plugins.RSS.format.context('$description'):
400406
self.assertRegexp('rss https://example.org',
401407
'content with XHTML')
402408

403409
@mock_urllib
404-
def testContentHtmlAndPlaintext(self, mock):
410+
def testAtomContentHtmlAndPlaintext(self, mock):
405411
timeFastForward(1.1)
406-
with conf.supybot.plugins.RSS.format.context('$content'):
407-
mock._data = """
412+
mock._data = """
408413
<?xml version="1.0" encoding="UTF-8"?>
409414
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" xml:lang="en-US">
410415
<title>Recent Commits to anope:2.0</title>
411416
<updated>2023-10-04T16:14:39Z</updated>
412417
<entry>
413-
<title>title with &lt;pre&gt;HTML&lt;pre&gt;</title>
418+
<title>title with &lt;pre&gt;HTML&lt;/pre&gt;</title>
414419
<updated>2023-10-04T16:14:39Z</updated>
415420
<!-- Atom spec says multiple contents is invalid, feedparser says it's not.
416421
I like having the option, so let's make sure we support it. -->
417422
<content type="html">
418-
content with &lt;pre&gt;HTML&lt;pre&gt;
423+
content with &lt;pre&gt;HTML&lt;/pre&gt;
419424
</content>
420425
<content type="text">
421426
content with plaintext
422427
</content>
423428
</entry>
424429
</feed>"""
430+
with conf.supybot.plugins.RSS.format.context('$content'):
431+
self.assertRegexp('rss https://example.org',
432+
'content with plaintext')
433+
with conf.supybot.plugins.RSS.format.context('$description'):
425434
self.assertRegexp('rss https://example.org',
426435
'content with plaintext')
427436

428437
@mock_urllib
429-
def testContentPlaintextAndHtml(self, mock):
438+
def testAtomContentPlaintextAndHtml(self, mock):
430439
timeFastForward(1.1)
431-
with conf.supybot.plugins.RSS.format.context('$content'):
432-
mock._data = """
440+
mock._data = """
433441
<?xml version="1.0" encoding="UTF-8"?>
434442
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" xml:lang="en-US">
435443
<title>Recent Commits to anope:2.0</title>
436444
<updated>2023-10-04T16:14:39Z</updated>
437445
<entry>
438-
<title>title with &lt;pre&gt;HTML&lt;pre&gt;</title>
446+
<title>title with &lt;pre&gt;HTML&lt;/pre&gt;</title>
439447
<updated>2023-10-04T16:14:39Z</updated>
440448
<!-- Atom spec says multiple contents is invalid, feedparser says it's not.
441449
I like having the option, so let's make sure we support it. -->
442450
<content type="text">
443451
content with plaintext
444452
</content>
445453
<content type="html">
446-
content with &lt;pre&gt;HTML&lt;pre&gt;
454+
content with &lt;pre&gt;HTML&lt;/pre&gt;
447455
</content>
448456
</entry>
449457
</feed>"""
458+
with conf.supybot.plugins.RSS.format.context('$content'):
459+
self.assertRegexp('rss https://example.org',
460+
'content with plaintext')
461+
with conf.supybot.plugins.RSS.format.context('$description'):
450462
self.assertRegexp('rss https://example.org',
451463
'content with plaintext')
452464

465+
@mock_urllib
466+
def testRssDescriptionHtml(self, mock):
467+
timeFastForward(1.1)
468+
mock._data = """
469+
<?xml version="1.0" encoding="utf-8"?>
470+
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:og="http://ogp.me/ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:sioc="http://rdfs.org/sioc/ns#" xmlns:sioct="http://rdfs.org/sioc/types#" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:xsd="http://www.w3.org/2001/XMLSchema#" version="2.0">
471+
<channel>
472+
<title>feed title</title>
473+
<description/>
474+
<language>en</language>
475+
<item>
476+
<title>title with &lt;pre&gt;HTML&lt;/pre&gt;</title>
477+
<description>description with &lt;pre&gt;HTML&lt;/pre&gt;</description>
478+
</item>
479+
</channel>
480+
</feed>"""
481+
with conf.supybot.plugins.RSS.format.context('$description'):
482+
self.assertRegexp('rss https://example.org',
483+
'description with HTML')
484+
453485
@mock_urllib
454486
def testFeedAttribute(self, mock):
455487
timeFastForward(1.1)

0 commit comments

Comments
 (0)