From 1e5f3903855ffb91d1aec1a0bfb7f0781c2832b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20=27Necoro=27=20Neumann?= Date: Wed, 5 Aug 2020 22:32:06 +0200 Subject: [PATCH 01/11] #151: Add atom namespaces to known namespaces --- internal/shared/extparser.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/shared/extparser.go b/internal/shared/extparser.go index 79c8d5ac..3706328a 100644 --- a/internal/shared/extparser.go +++ b/internal/shared/extparser.go @@ -121,6 +121,8 @@ func prefixForNamespace(space string, p *xpp.XMLPullParser) string { // These canonical prefixes override any prefixes used in the feed itself. var canonicalNamespaces = map[string]string{ "http://webns.net/mvcb/": "admin", + "http://www.w3.org/2005/Atom": "atom", + "http://purl.org/atom/ns#": "atom03", "http://purl.org/rss/1.0/modules/aggregation/": "ag", "http://purl.org/rss/1.0/modules/annotate/": "annotate", "http://media.tangent.org/rss/1.0/": "audio", From ad2f724d97c86f269f18e8e39a1f8435f568568d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20=27Necoro=27=20Neumann?= Date: Wed, 5 Aug 2020 23:27:42 +0200 Subject: [PATCH 02/11] #151: Atom parser: Avoid superfluous intermediate variables --- atom/parser.go | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/atom/parser.go b/atom/parser.go index 09081145..416bbf5b 100644 --- a/atom/parser.go +++ b/atom/parser.go @@ -202,10 +202,6 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) { } entry := &Entry{} - contributors := []*Person{} - authors := []*Person{} - categories := []*Category{} - links := []*Link{} extensions := ext.Extensions{} for { @@ -276,25 +272,25 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) { if err != nil { return nil, err } - contributors = append(contributors, result) + entry.Contributors = append(entry.Contributors, result) } else if name == "author" { result, err := ap.parsePerson("author", p) if err != nil { return nil, err } - authors = append(authors, result) + entry.Authors = append(entry.Authors, result) } else if name == "category" { result, err := ap.parseCategory(p) if err != nil { return nil, err } - categories = append(categories, result) + entry.Categories = append(entry.Categories, result) } else if name == "link" { result, err := ap.parseLink(p) if err != nil { return nil, err } - links = append(links, result) + entry.Links = append(entry.Links, result) } else if name == "published" || name == "issued" { result, err := ap.parseAtomText(p) @@ -322,22 +318,6 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) { } } - if len(categories) > 0 { - entry.Categories = categories - } - - if len(authors) > 0 { - entry.Authors = authors - } - - if len(links) > 0 { - entry.Links = links - } - - if len(contributors) > 0 { - entry.Contributors = contributors - } - if len(extensions) > 0 { entry.Extensions = extensions } From 0fb9c69a993e519921b0efa9b63e123883ea90d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20=27Necoro=27=20Neumann?= Date: Sat, 22 Aug 2020 16:54:10 +0200 Subject: [PATCH 03/11] #151: Test case with atom author --- .../rss/rss_channel_item_author_atom.json | 26 +++++++++++++++++++ .../rss/rss_channel_item_author_atom.xml | 10 +++++++ 2 files changed, 36 insertions(+) create mode 100644 testdata/parser/rss/rss_channel_item_author_atom.json create mode 100644 testdata/parser/rss/rss_channel_item_author_atom.xml diff --git a/testdata/parser/rss/rss_channel_item_author_atom.json b/testdata/parser/rss/rss_channel_item_author_atom.json new file mode 100644 index 00000000..06fe10af --- /dev/null +++ b/testdata/parser/rss/rss_channel_item_author_atom.json @@ -0,0 +1,26 @@ +{ + "items": [ + { + "extensions": { + "atom": { + "author": [ + { + "name": "author", + "value": "", + "attrs": null, + "children": null, + "parsed": { + "authors": [ + { + "name": "Item Author" + } + ] + } + } + ] + } + } + } + ], + "version": "2.0" +} diff --git a/testdata/parser/rss/rss_channel_item_author_atom.xml b/testdata/parser/rss/rss_channel_item_author_atom.xml new file mode 100644 index 00000000..f444b0cc --- /dev/null +++ b/testdata/parser/rss/rss_channel_item_author_atom.xml @@ -0,0 +1,10 @@ + + + + + Item Author + + + From 5251f76f692eca1e481f373e9df483b8b62cc22a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20=27Necoro=27=20Neumann?= Date: Sun, 5 Mar 2023 15:48:56 +0100 Subject: [PATCH 04/11] #151: Allow additional parsers for feed formats. Currently only atom is allowed as part of RSS --- atom/parser.go | 205 +++++++++++++++++++---------------- extensions/extensions.go | 1 + internal/shared/extparser.go | 33 +++++- parser.go | 14 ++- rss/parser.go | 12 +- rss/parser_test.go | 3 +- 6 files changed, 167 insertions(+), 101 deletions(-) diff --git a/atom/parser.go b/atom/parser.go index 416bbf5b..1840a719 100644 --- a/atom/parser.go +++ b/atom/parser.go @@ -21,6 +21,9 @@ var ( "uri": true, "url": true, // atom 0.3 } + + // No known explicit extension parsers for Atom, currently + emptyExtParsers = make(shared.ExtParsers) ) // Parser is an Atom Parser @@ -38,6 +41,14 @@ func (ap *Parser) Parse(feed io.Reader) (*Feed, error) { return ap.parseRoot(p) } +func (ap *Parser) ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error) { + entry := &Entry{} + if err := ap.parseEntryContent(p, entry); err != nil { + return nil, err + } + return entry, nil +} + func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) { if err := p.Expect(xpp.StartTag, "feed"); err != nil { return nil, err @@ -69,7 +80,7 @@ func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) { name := strings.ToLower(p.Name) if shared.IsExtension(p) { - e, err := shared.ParseExtension(extensions, p) + e, err := shared.ParseExtension(extensions, p, emptyExtParsers) if err != nil { return nil, err } @@ -215,103 +226,14 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) { } if tok == xpp.StartTag { - - name := strings.ToLower(p.Name) - if shared.IsExtension(p) { - e, err := shared.ParseExtension(extensions, p) + e, err := shared.ParseExtension(extensions, p, emptyExtParsers) if err != nil { return nil, err } extensions = e - } else if name == "title" { - result, err := ap.parseAtomText(p) - if err != nil { - return nil, err - } - entry.Title = result - } else if name == "id" { - result, err := ap.parseAtomText(p) - if err != nil { - return nil, err - } - entry.ID = result - } else if name == "rights" || - name == "copyright" { - result, err := ap.parseAtomText(p) - if err != nil { - return nil, err - } - entry.Rights = result - } else if name == "summary" { - result, err := ap.parseAtomText(p) - if err != nil { - return nil, err - } - entry.Summary = result - } else if name == "source" { - result, err := ap.parseSource(p) - if err != nil { - return nil, err - } - entry.Source = result - } else if name == "updated" || - name == "modified" { - result, err := ap.parseAtomText(p) - if err != nil { - return nil, err - } - entry.Updated = result - date, err := shared.ParseDate(result) - if err == nil { - utcDate := date.UTC() - entry.UpdatedParsed = &utcDate - } - } else if name == "contributor" { - result, err := ap.parsePerson("contributor", p) - if err != nil { - return nil, err - } - entry.Contributors = append(entry.Contributors, result) - } else if name == "author" { - result, err := ap.parsePerson("author", p) - if err != nil { - return nil, err - } - entry.Authors = append(entry.Authors, result) - } else if name == "category" { - result, err := ap.parseCategory(p) - if err != nil { - return nil, err - } - entry.Categories = append(entry.Categories, result) - } else if name == "link" { - result, err := ap.parseLink(p) - if err != nil { - return nil, err - } - entry.Links = append(entry.Links, result) - } else if name == "published" || - name == "issued" { - result, err := ap.parseAtomText(p) - if err != nil { - return nil, err - } - entry.Published = result - date, err := shared.ParseDate(result) - if err == nil { - utcDate := date.UTC() - entry.PublishedParsed = &utcDate - } - } else if name == "content" { - result, err := ap.parseContent(p) - if err != nil { - return nil, err - } - entry.Content = result } else { - err := p.Skip() - if err != nil { + if err := ap.parseEntryContent(p, entry); err != nil { return nil, err } } @@ -329,6 +251,103 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) { return entry, nil } +func (ap *Parser) parseEntryContent(p *xpp.XMLPullParser, entry *Entry) error { + name := strings.ToLower(p.Name) + + if name == "title" { + result, err := ap.parseAtomText(p) + if err != nil { + return err + } + entry.Title = result + } else if name == "id" { + result, err := ap.parseAtomText(p) + if err != nil { + return err + } + entry.ID = result + } else if name == "rights" || + name == "copyright" { + result, err := ap.parseAtomText(p) + if err != nil { + return err + } + entry.Rights = result + } else if name == "summary" { + result, err := ap.parseAtomText(p) + if err != nil { + return err + } + entry.Summary = result + } else if name == "source" { + result, err := ap.parseSource(p) + if err != nil { + return err + } + entry.Source = result + } else if name == "updated" || + name == "modified" { + result, err := ap.parseAtomText(p) + if err != nil { + return err + } + entry.Updated = result + date, err := shared.ParseDate(result) + if err == nil { + utcDate := date.UTC() + entry.UpdatedParsed = &utcDate + } + } else if name == "contributor" { + result, err := ap.parsePerson("contributor", p) + if err != nil { + return err + } + entry.Contributors = append(entry.Contributors, result) + } else if name == "author" { + result, err := ap.parsePerson("author", p) + if err != nil { + return err + } + entry.Authors = append(entry.Authors, result) + } else if name == "category" { + result, err := ap.parseCategory(p) + if err != nil { + return err + } + entry.Categories = append(entry.Categories, result) + } else if name == "link" { + result, err := ap.parseLink(p) + if err != nil { + return err + } + entry.Links = append(entry.Links, result) + } else if name == "published" || + name == "issued" { + result, err := ap.parseAtomText(p) + if err != nil { + return err + } + entry.Published = result + date, err := shared.ParseDate(result) + if err == nil { + utcDate := date.UTC() + entry.PublishedParsed = &utcDate + } + } else if name == "content" { + result, err := ap.parseContent(p) + if err != nil { + return err + } + entry.Content = result + } else { + err := p.Skip() + if err != nil { + return err + } + } + return nil +} + func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) { if err := p.Expect(xpp.StartTag, "source"); err != nil { @@ -358,7 +377,7 @@ func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) { name := strings.ToLower(p.Name) if shared.IsExtension(p) { - e, err := shared.ParseExtension(extensions, p) + e, err := shared.ParseExtension(extensions, p, emptyExtParsers) if err != nil { return nil, err } diff --git a/extensions/extensions.go b/extensions/extensions.go index 6c50d4aa..53021db8 100644 --- a/extensions/extensions.go +++ b/extensions/extensions.go @@ -12,6 +12,7 @@ type Extension struct { Value string `json:"value"` Attrs map[string]string `json:"attrs"` Children map[string][]Extension `json:"children"` + Parsed interface{} `json:"parsed,omitempty"` } func parseTextExtension(name string, extensions map[string][]Extension) (value string) { diff --git a/internal/shared/extparser.go b/internal/shared/extparser.go index 3706328a..9964a142 100644 --- a/internal/shared/extparser.go +++ b/internal/shared/extparser.go @@ -7,6 +7,12 @@ import ( "github.com/mmcdole/goxpp" ) +type ExtParser interface { + ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error) +} + +type ExtParsers map[string]ExtParser + // IsExtension returns whether or not the current // XML element is an extension element (if it has a // non empty prefix) @@ -22,10 +28,16 @@ func IsExtension(p *xpp.XMLPullParser) bool { // ParseExtension parses the current element of the // XMLPullParser as an extension element and updates // the extension map -func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) { +func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser, extParsers ExtParsers) (ext.Extensions, error) { prefix := prefixForNamespace(p.Space, p) - result, err := parseExtensionElement(p) + var result ext.Extension + var err error + if extParser, ok := extParsers[prefix]; ok { + result, err = parseExtensionFromParser(p, extParser) + } else { + result, err = parseExtensionElement(p) + } if err != nil { return nil, err } @@ -43,6 +55,23 @@ func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, er return fe, nil } +func parseExtensionFromParser(p *xpp.XMLPullParser, extParser ExtParser) (e ext.Extension, err error) { + if err = p.Expect(xpp.StartTag, "*"); err != nil { + return e, err + } + + e.Name = p.Name + if e.Parsed, err = extParser.ParseAsExtension(p); err != nil { + return e, err + } + + if err = p.Expect(xpp.EndTag, e.Name); err != nil { + return e, err + } + + return e, nil +} + func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) { if err = p.Expect(xpp.StartTag, "*"); err != nil { return e, err diff --git a/parser.go b/parser.go index 74d29ca0..2d498bde 100644 --- a/parser.go +++ b/parser.go @@ -11,6 +11,7 @@ import ( "github.com/mmcdole/gofeed/atom" "github.com/mmcdole/gofeed/json" + "github.com/mmcdole/gofeed/internal/shared" "github.com/mmcdole/gofeed/rss" ) @@ -155,8 +156,19 @@ func (f *Parser) parseAtomFeed(feed io.Reader) (*Feed, error) { return f.atomTrans().Translate(af) } +func (f *Parser) BuildRSSExtParsers() shared.ExtParsers { + extParsers := make(shared.ExtParsers, 3) + + // all possible atom variants + extParsers["atom"] = f.ap + extParsers["atom10"] = f.ap + extParsers["atom03"] = f.ap + + return extParsers +} + func (f *Parser) parseRSSFeed(feed io.Reader) (*Feed, error) { - rf, err := f.rp.Parse(feed) + rf, err := f.rp.Parse(feed, f.BuildRSSExtParsers()) if err != nil { return nil, err } diff --git a/rss/parser.go b/rss/parser.go index 61e3fbd5..11aef900 100644 --- a/rss/parser.go +++ b/rss/parser.go @@ -11,11 +11,14 @@ import ( ) // Parser is a RSS Parser -type Parser struct{} +type Parser struct { + extParsers shared.ExtParsers +} // Parse parses an xml feed into an rss.Feed -func (rp *Parser) Parse(feed io.Reader) (*Feed, error) { +func (rp *Parser) Parse(feed io.Reader, extParsers shared.ExtParsers) (*Feed, error) { p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel) + rp.extParsers = extParsers _, err := shared.FindRoot(p) if err != nil { @@ -141,7 +144,8 @@ func (rp *Parser) parseChannel(p *xpp.XMLPullParser) (rss *Feed, err error) { name := strings.ToLower(p.Name) if shared.IsExtension(p) { - ext, err := shared.ParseExtension(extensions, p) + + ext, err := shared.ParseExtension(extensions, p, rp.extParsers) if err != nil { return nil, err } @@ -338,7 +342,7 @@ func (rp *Parser) parseItem(p *xpp.XMLPullParser) (item *Item, err error) { name := strings.ToLower(p.Name) if shared.IsExtension(p) { - ext, err := shared.ParseExtension(extensions, p) + ext, err := shared.ParseExtension(extensions, p, rp.extParsers) if err != nil { return nil, err } diff --git a/rss/parser_test.go b/rss/parser_test.go index e46204c4..35f4872a 100644 --- a/rss/parser_test.go +++ b/rss/parser_test.go @@ -9,6 +9,7 @@ import ( "strings" "testing" + "github.com/mmcdole/gofeed" "github.com/mmcdole/gofeed/rss" "github.com/stretchr/testify/assert" ) @@ -27,7 +28,7 @@ func TestParser_Parse(t *testing.T) { // Parse actual feed fp := &rss.Parser{} - actual, _ := fp.Parse(bytes.NewReader(f)) + actual, _ := fp.Parse(bytes.NewReader(f), gofeed.NewParser().BuildRSSExtParsers()) // Get json encoded expected feed result ef := fmt.Sprintf("../testdata/parser/rss/%s.json", name) From 9eb41f5ee847ffe33b6011494a714c45bcc4c839 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20=27Necoro=27=20Neumann?= Date: Sat, 22 Aug 2020 17:42:17 +0200 Subject: [PATCH 05/11] #151: To make the test work, the parsed extensions have to take a json roundtrip --- rss/parser_test.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/rss/parser_test.go b/rss/parser_test.go index 35f4872a..22ef361c 100644 --- a/rss/parser_test.go +++ b/rss/parser_test.go @@ -30,6 +30,15 @@ func TestParser_Parse(t *testing.T) { fp := &rss.Parser{} actual, _ := fp.Parse(bytes.NewReader(f), gofeed.NewParser().BuildRSSExtParsers()) + // the `Parsed` part of extensions is not correctly unmarshalled from JSON + // workaround: move the actual extensions through a round of json marshalling so that we get the same + for _, i := range actual.Items { + if len(i.Extensions) > 0 { + b, _ := json.Marshal(i.Extensions) + json.Unmarshal(b, &i.Extensions) + } + } + // Get json encoded expected feed result ef := fmt.Sprintf("../testdata/parser/rss/%s.json", name) e, _ := ioutil.ReadFile(ef) From 96ac374e68e71d8878329434279fbcefd40c7e3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20=27Necoro=27=20Neumann?= Date: Wed, 5 Aug 2020 22:32:59 +0200 Subject: [PATCH 06/11] Introduce atomExtensionsWithKey --- translator.go | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/translator.go b/translator.go index 7fe02df8..eab95816 100644 --- a/translator.go +++ b/translator.go @@ -105,16 +105,13 @@ func (t *DefaultRSSTranslator) translateFeedLink(rss *rss.Feed) (link string) { } func (t *DefaultRSSTranslator) translateFeedFeedLink(rss *rss.Feed) (link string) { - atomExtensions := t.extensionsForKeys([]string{"atom", "atom10", "atom03"}, rss.Extensions) - for _, ex := range atomExtensions { - if links, ok := ex["link"]; ok { - for _, l := range links { - if l.Attrs["rel"] == "self" { - link = l.Attrs["href"] - } - } + t.atomExtensionsWithKey(rss, "link", func(l ext.Extension) bool { + if l.Attrs["rel"] == "self" { + link = l.Attrs["href"] + return true } - } + return false + }) return } @@ -467,6 +464,19 @@ func (t *DefaultRSSTranslator) extensionsForKeys(keys []string, extensions ext.E return } +func (t *DefaultRSSTranslator) atomExtensionsWithKey(rss *rss.Feed, tag string, f func(ext.Extension) bool) { + atomExtensions := t.extensionsForKeys([]string{"atom", "atom10", "atom03"}, rss.Extensions) + for _, ex := range atomExtensions { + if exts, ok := ex[tag]; ok { + for _, e := range exts { + if f(e) { + return + } + } + } + } +} + func (t *DefaultRSSTranslator) firstEntry(entries []string) (value string) { if entries == nil { return From 8cab1616169c67d8a2972725bb44f22ee219de9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20=27Necoro=27=20Neumann?= Date: Sat, 22 Aug 2020 20:37:44 +0200 Subject: [PATCH 07/11] #151: Disable extension support for channels --- rss/parser.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rss/parser.go b/rss/parser.go index 11aef900..3079426a 100644 --- a/rss/parser.go +++ b/rss/parser.go @@ -144,8 +144,8 @@ func (rp *Parser) parseChannel(p *xpp.XMLPullParser) (rss *Feed, err error) { name := strings.ToLower(p.Name) if shared.IsExtension(p) { - - ext, err := shared.ParseExtension(extensions, p, rp.extParsers) + // TODO: Currently no extParser support for channels + ext, err := shared.ParseExtension(extensions, p, make(shared.ExtParsers)) if err != nil { return nil, err } From 69a2fc6d10074d7baa8c0c0461b63715d29058a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20=27Necoro=27=20Neumann?= Date: Sat, 22 Aug 2020 20:40:36 +0200 Subject: [PATCH 08/11] #151: Translation support for parsed extensions --- extensions/extensions.go | 4 ++++ rss/feed.go | 8 ++++++++ translator.go | 20 +++++++++++++++++--- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/extensions/extensions.go b/extensions/extensions.go index 53021db8..eef62999 100644 --- a/extensions/extensions.go +++ b/extensions/extensions.go @@ -15,6 +15,10 @@ type Extension struct { Parsed interface{} `json:"parsed,omitempty"` } +type Extendable interface { + GetExtensions() Extensions +} + func parseTextExtension(name string, extensions map[string][]Extension) (value string) { if extensions == nil { return diff --git a/rss/feed.go b/rss/feed.go index 394f3d13..cdc07906 100644 --- a/rss/feed.go +++ b/rss/feed.go @@ -38,6 +38,10 @@ type Feed struct { Version string `json:"version"` } +func (f Feed) GetExtensions() ext.Extensions { + return f.Extensions +} + func (f Feed) String() string { json, _ := json.MarshalIndent(f, "", " ") return string(json) @@ -65,6 +69,10 @@ type Item struct { Custom map[string]string `json:"custom,omitempty"` } +func (i Item) GetExtensions() ext.Extensions { + return i.Extensions +} + // Image is an image that represents the feed type Image struct { URL string `json:"url,omitempty"` diff --git a/translator.go b/translator.go index eab95816..11fcd66f 100644 --- a/translator.go +++ b/translator.go @@ -24,7 +24,9 @@ type Translator interface { // This default implementation defines a set of // mapping rules between rss.Feed -> Feed // for each of the fields in Feed. -type DefaultRSSTranslator struct{} +type DefaultRSSTranslator struct{ + atomTranslator DefaultAtomTranslator +} // Translate converts an RSS feed into the universal // feed type. @@ -359,6 +361,8 @@ func (t *DefaultRSSTranslator) translateItemAuthor(rssItem *rss.Item) (author *P author = &Person{} author.Name = name author.Email = address + } else if authorVal, ok := t.hasAtomExtensionsForKey(rssItem, "author"); ok { + author = t.atomTranslator.translateItemAuthor(authorVal) } else if rssItem.DublinCoreExt != nil && rssItem.DublinCoreExt.Author != nil { dcAuthor := t.firstEntry(rssItem.DublinCoreExt.Author) name, address := shared.ParseNameAddress(dcAuthor) @@ -464,8 +468,8 @@ func (t *DefaultRSSTranslator) extensionsForKeys(keys []string, extensions ext.E return } -func (t *DefaultRSSTranslator) atomExtensionsWithKey(rss *rss.Feed, tag string, f func(ext.Extension) bool) { - atomExtensions := t.extensionsForKeys([]string{"atom", "atom10", "atom03"}, rss.Extensions) +func (t *DefaultRSSTranslator) atomExtensionsWithKey(rss ext.Extendable, tag string, f func(ext.Extension) bool) { + atomExtensions := t.extensionsForKeys([]string{"atom", "atom10", "atom03"}, rss.GetExtensions()) for _, ex := range atomExtensions { if exts, ok := ex[tag]; ok { for _, e := range exts { @@ -477,6 +481,16 @@ func (t *DefaultRSSTranslator) atomExtensionsWithKey(rss *rss.Feed, tag string, } } +func (t *DefaultRSSTranslator) hasAtomExtensionsForKey(rss ext.Extendable, tag string) (entry *atom.Entry, ok bool) { + t.atomExtensionsWithKey(rss, tag, func(extension ext.Extension) bool { + if extension.Parsed != nil { + entry, ok = extension.Parsed.(*atom.Entry) + } + return ok + }) + return +} + func (t *DefaultRSSTranslator) firstEntry(entries []string) (value string) { if entries == nil { return From 03c429996a2d9fd8a857af8ff50de1290bb50985 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20=27Necoro=27=20Neumann?= Date: Sat, 22 Aug 2020 20:41:11 +0200 Subject: [PATCH 09/11] #151 Testcase for atom author in RSS --- ...author_-_rss_channel_item_author_atom.json | 33 +++++++++++++++++++ ..._author_-_rss_channel_item_author_atom.xml | 10 ++++++ translator_test.go | 11 ++++++- 3 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 testdata/translator/rss/feed_item_author_-_rss_channel_item_author_atom.json create mode 100644 testdata/translator/rss/feed_item_author_-_rss_channel_item_author_atom.xml diff --git a/testdata/translator/rss/feed_item_author_-_rss_channel_item_author_atom.json b/testdata/translator/rss/feed_item_author_-_rss_channel_item_author_atom.json new file mode 100644 index 00000000..ff2dbe5a --- /dev/null +++ b/testdata/translator/rss/feed_item_author_-_rss_channel_item_author_atom.json @@ -0,0 +1,33 @@ +{ + "items": [ + { + "author": { + "name": "Item Author" + }, + "authors": [ + { + "name": "Item Author" + } + ], + "extensions": { + "atom": { + "author": [ + { + "name": "author", + "value": "", + "parsed": { + "authors": [ + { + "name": "Item Author" + } + ] + } + } + ] + } + } + } + ], + "feedType": "rss", + "feedVersion": "2.0" +} diff --git a/testdata/translator/rss/feed_item_author_-_rss_channel_item_author_atom.xml b/testdata/translator/rss/feed_item_author_-_rss_channel_item_author_atom.xml new file mode 100644 index 00000000..f444b0cc --- /dev/null +++ b/testdata/translator/rss/feed_item_author_-_rss_channel_item_author_atom.xml @@ -0,0 +1,10 @@ + + + + + Item Author + + + diff --git a/translator_test.go b/translator_test.go index bbd76d69..e10fa945 100644 --- a/translator_test.go +++ b/translator_test.go @@ -32,9 +32,18 @@ func TestDefaultRSSTranslator_Translate(t *testing.T) { // Parse actual feed translator := &gofeed.DefaultRSSTranslator{} fp := &rss.Parser{} - rssFeed, _ := fp.Parse(f) + rssFeed, _ := fp.Parse(f, gofeed.NewParser().BuildRSSExtParsers()) actual, _ := translator.Translate(rssFeed) + // the `Parsed` part of extensions is not correctly unmarshalled from JSON + // workaround: move the actual extensions through a round of json marshalling so that we get the same + for _, i := range actual.Items { + if len(i.Extensions) > 0 { + b, _ := jsonEncoding.Marshal(i.Extensions) + jsonEncoding.Unmarshal(b, &i.Extensions) + } + } + // Get json encoded expected feed result ef := fmt.Sprintf("testdata/translator/rss/%s.json", name) e, _ := ioutil.ReadFile(ef) From 719f3c52bcaf505ff55f6ea2e975533eb72ecf36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20=27Necoro=27=20Neumann?= Date: Sat, 22 Aug 2020 21:21:28 +0200 Subject: [PATCH 10/11] #151: Support for atom update date --- ...dated_-_rss_channel_item_updated_atom.json | 24 +++++++++++++++++++ ...pdated_-_rss_channel_item_updated_atom.xml | 10 ++++++++ translator.go | 12 ++++++---- 3 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 testdata/translator/rss/feed_item_updated_-_rss_channel_item_updated_atom.json create mode 100644 testdata/translator/rss/feed_item_updated_-_rss_channel_item_updated_atom.xml diff --git a/testdata/translator/rss/feed_item_updated_-_rss_channel_item_updated_atom.json b/testdata/translator/rss/feed_item_updated_-_rss_channel_item_updated_atom.json new file mode 100644 index 00000000..256bea65 --- /dev/null +++ b/testdata/translator/rss/feed_item_updated_-_rss_channel_item_updated_atom.json @@ -0,0 +1,24 @@ +{ + "items": [ + { + "updated": "Thu, 01 Jan 2004 19:48:21 GMT", + "updatedParsed": "2004-01-01T19:48:21Z", + "extensions": { + "atom": { + "updated": [ + { + "name": "updated", + "value": "", + "parsed": { + "updated": "Thu, 01 Jan 2004 19:48:21 GMT", + "updatedParsed": "2004-01-01T19:48:21Z" + } + } + ] + } + } + } + ], + "feedType": "rss", + "feedVersion": "2.0" +} diff --git a/testdata/translator/rss/feed_item_updated_-_rss_channel_item_updated_atom.xml b/testdata/translator/rss/feed_item_updated_-_rss_channel_item_updated_atom.xml new file mode 100644 index 00000000..8706e4d6 --- /dev/null +++ b/testdata/translator/rss/feed_item_updated_-_rss_channel_item_updated_atom.xml @@ -0,0 +1,10 @@ + + + + + Thu, 01 Jan 2004 19:48:21 GMT + + + diff --git a/translator.go b/translator.go index 11fcd66f..9e5d16f3 100644 --- a/translator.go +++ b/translator.go @@ -71,6 +71,8 @@ func (t *DefaultRSSTranslator) translateFeedItem(rssItem *rss.Item) (item *Item) item.Links = t.translateItemLinks(rssItem) item.Published = t.translateItemPublished(rssItem) item.PublishedParsed = t.translateItemPublishedParsed(rssItem) + item.Updated = t.translateItemUpdated(rssItem) + item.UpdatedParsed = t.translateItemUpdatedParsed(rssItem) item.Author = t.translateItemAuthor(rssItem) item.Authors = t.translateItemAuthors(rssItem) item.GUID = t.translateItemGUID(rssItem) @@ -316,18 +318,20 @@ func (t *DefaultRSSTranslator) translateItemLinks(rssItem *rss.Item) (links []st } func (t *DefaultRSSTranslator) translateItemUpdated(rssItem *rss.Item) (updated string) { - if rssItem.DublinCoreExt != nil && rssItem.DublinCoreExt.Date != nil { + if updatedVal, ok := t.hasAtomExtensionsForKey(rssItem, "updated"); ok { + updated = t.atomTranslator.translateItemUpdated(updatedVal) + } else if rssItem.DublinCoreExt != nil && rssItem.DublinCoreExt.Date != nil { updated = t.firstEntry(rssItem.DublinCoreExt.Date) } return updated } func (t *DefaultRSSTranslator) translateItemUpdatedParsed(rssItem *rss.Item) (updated *time.Time) { - if rssItem.DublinCoreExt != nil && rssItem.DublinCoreExt.Date != nil { - updatedText := t.firstEntry(rssItem.DublinCoreExt.Date) + if updatedText := t.translateItemUpdated(rssItem); updatedText != "" { updatedDate, err := shared.ParseDate(updatedText) if err == nil { - updated = &updatedDate + utcDate := updatedDate.UTC() + updated = &utcDate } } return From ad0267e149f797b12653bf032be44d92b1c2a8ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20=27Necoro=27=20Neumann?= Date: Sun, 6 Sep 2020 17:51:58 +0200 Subject: [PATCH 11/11] Fixed call in `ftest` --- cmd/ftest/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/ftest/main.go b/cmd/ftest/main.go index 618e9cc5..fbf4eb20 100644 --- a/cmd/ftest/main.go +++ b/cmd/ftest/main.go @@ -44,7 +44,7 @@ func main() { if strings.EqualFold(feedType, "rss") || strings.EqualFold(feedType, "r") { p := rss.Parser{} - feed, err = p.Parse(strings.NewReader(fc)) + feed, err = p.Parse(strings.NewReader(fc), gofeed.NewParser().BuildRSSExtParsers()) } else if strings.EqualFold(feedType, "atom") || strings.EqualFold(feedType, "a") { p := atom.Parser{}