Skip to content

Commit 6018a29

Browse files
committed
#151: Allow additional parsers for feed formats. Currently only atom is allowed as part of RSS
1 parent eb2bc99 commit 6018a29

File tree

6 files changed

+166
-100
lines changed

6 files changed

+166
-100
lines changed

atom/parser.go

Lines changed: 113 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ var (
3030
"src": true,
3131
"uri": true,
3232
}
33+
34+
// No known explicit extension parsers for Atom, currently
35+
emptyExtParsers = make(shared.ExtParsers)
3336
)
3437

3538
// Parser is an Atom Parser
@@ -50,6 +53,15 @@ func (ap *Parser) Parse(feed io.Reader) (*Feed, error) {
5053
return ap.parseRoot(p)
5154
}
5255

56+
func (ap *Parser) ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error) {
57+
ap.base = &shared.XMLBase{URIAttrs: atomURIAttrs} // TODO: do we need the surrounding base for the urlstack?
58+
entry := &Entry{}
59+
if err := ap.parseEntryContent(p, entry); err != nil {
60+
return nil, err
61+
}
62+
return entry, nil
63+
}
64+
5365
func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
5466
if err := p.Expect(xpp.StartTag, "feed"); err != nil {
5567
return nil, err
@@ -81,7 +93,7 @@ func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
8193
name := strings.ToLower(p.Name)
8294

8395
if shared.IsExtension(p) {
84-
e, err := shared.ParseExtension(extensions, p)
96+
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
8597
if err != nil {
8698
return nil, err
8799
}
@@ -227,103 +239,14 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
227239
}
228240

229241
if tok == xpp.StartTag {
230-
231-
name := strings.ToLower(p.Name)
232-
233242
if shared.IsExtension(p) {
234-
e, err := shared.ParseExtension(extensions, p)
243+
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
235244
if err != nil {
236245
return nil, err
237246
}
238247
extensions = e
239-
} else if name == "title" {
240-
result, err := ap.parseAtomText(p)
241-
if err != nil {
242-
return nil, err
243-
}
244-
entry.Title = result
245-
} else if name == "id" {
246-
result, err := ap.parseAtomText(p)
247-
if err != nil {
248-
return nil, err
249-
}
250-
entry.ID = result
251-
} else if name == "rights" ||
252-
name == "copyright" {
253-
result, err := ap.parseAtomText(p)
254-
if err != nil {
255-
return nil, err
256-
}
257-
entry.Rights = result
258-
} else if name == "summary" {
259-
result, err := ap.parseAtomText(p)
260-
if err != nil {
261-
return nil, err
262-
}
263-
entry.Summary = result
264-
} else if name == "source" {
265-
result, err := ap.parseSource(p)
266-
if err != nil {
267-
return nil, err
268-
}
269-
entry.Source = result
270-
} else if name == "updated" ||
271-
name == "modified" {
272-
result, err := ap.parseAtomText(p)
273-
if err != nil {
274-
return nil, err
275-
}
276-
entry.Updated = result
277-
date, err := shared.ParseDate(result)
278-
if err == nil {
279-
utcDate := date.UTC()
280-
entry.UpdatedParsed = &utcDate
281-
}
282-
} else if name == "contributor" {
283-
result, err := ap.parsePerson("contributor", p)
284-
if err != nil {
285-
return nil, err
286-
}
287-
entry.Contributors = append(entry.Contributors, result)
288-
} else if name == "author" {
289-
result, err := ap.parsePerson("author", p)
290-
if err != nil {
291-
return nil, err
292-
}
293-
entry.Authors = append(entry.Authors, result)
294-
} else if name == "category" {
295-
result, err := ap.parseCategory(p)
296-
if err != nil {
297-
return nil, err
298-
}
299-
entry.Categories = append(entry.Categories, result)
300-
} else if name == "link" {
301-
result, err := ap.parseLink(p)
302-
if err != nil {
303-
return nil, err
304-
}
305-
entry.Links = append(entry.Links, result)
306-
} else if name == "published" ||
307-
name == "issued" {
308-
result, err := ap.parseAtomText(p)
309-
if err != nil {
310-
return nil, err
311-
}
312-
entry.Published = result
313-
date, err := shared.ParseDate(result)
314-
if err == nil {
315-
utcDate := date.UTC()
316-
entry.PublishedParsed = &utcDate
317-
}
318-
} else if name == "content" {
319-
result, err := ap.parseContent(p)
320-
if err != nil {
321-
return nil, err
322-
}
323-
entry.Content = result
324248
} else {
325-
err := p.Skip()
326-
if err != nil {
249+
if err := ap.parseEntryContent(p, entry); err != nil {
327250
return nil, err
328251
}
329252
}
@@ -341,6 +264,103 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
341264
return entry, nil
342265
}
343266

267+
func (ap *Parser) parseEntryContent(p *xpp.XMLPullParser, entry *Entry) error {
268+
name := strings.ToLower(p.Name)
269+
270+
if name == "title" {
271+
result, err := ap.parseAtomText(p)
272+
if err != nil {
273+
return err
274+
}
275+
entry.Title = result
276+
} else if name == "id" {
277+
result, err := ap.parseAtomText(p)
278+
if err != nil {
279+
return err
280+
}
281+
entry.ID = result
282+
} else if name == "rights" ||
283+
name == "copyright" {
284+
result, err := ap.parseAtomText(p)
285+
if err != nil {
286+
return err
287+
}
288+
entry.Rights = result
289+
} else if name == "summary" {
290+
result, err := ap.parseAtomText(p)
291+
if err != nil {
292+
return err
293+
}
294+
entry.Summary = result
295+
} else if name == "source" {
296+
result, err := ap.parseSource(p)
297+
if err != nil {
298+
return err
299+
}
300+
entry.Source = result
301+
} else if name == "updated" ||
302+
name == "modified" {
303+
result, err := ap.parseAtomText(p)
304+
if err != nil {
305+
return err
306+
}
307+
entry.Updated = result
308+
date, err := shared.ParseDate(result)
309+
if err == nil {
310+
utcDate := date.UTC()
311+
entry.UpdatedParsed = &utcDate
312+
}
313+
} else if name == "contributor" {
314+
result, err := ap.parsePerson("contributor", p)
315+
if err != nil {
316+
return err
317+
}
318+
entry.Contributors = append(entry.Contributors, result)
319+
} else if name == "author" {
320+
result, err := ap.parsePerson("author", p)
321+
if err != nil {
322+
return err
323+
}
324+
entry.Authors = append(entry.Authors, result)
325+
} else if name == "category" {
326+
result, err := ap.parseCategory(p)
327+
if err != nil {
328+
return err
329+
}
330+
entry.Categories = append(entry.Categories, result)
331+
} else if name == "link" {
332+
result, err := ap.parseLink(p)
333+
if err != nil {
334+
return err
335+
}
336+
entry.Links = append(entry.Links, result)
337+
} else if name == "published" ||
338+
name == "issued" {
339+
result, err := ap.parseAtomText(p)
340+
if err != nil {
341+
return err
342+
}
343+
entry.Published = result
344+
date, err := shared.ParseDate(result)
345+
if err == nil {
346+
utcDate := date.UTC()
347+
entry.PublishedParsed = &utcDate
348+
}
349+
} else if name == "content" {
350+
result, err := ap.parseContent(p)
351+
if err != nil {
352+
return err
353+
}
354+
entry.Content = result
355+
} else {
356+
err := p.Skip()
357+
if err != nil {
358+
return err
359+
}
360+
}
361+
return nil
362+
}
363+
344364
func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {
345365

346366
if err := p.Expect(xpp.StartTag, "source"); err != nil {
@@ -370,7 +390,7 @@ func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {
370390
name := strings.ToLower(p.Name)
371391

372392
if shared.IsExtension(p) {
373-
e, err := shared.ParseExtension(extensions, p)
393+
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
374394
if err != nil {
375395
return nil, err
376396
}

extensions/extensions.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ type Extension struct {
1212
Value string `json:"value"`
1313
Attrs map[string]string `json:"attrs"`
1414
Children map[string][]Extension `json:"children"`
15+
Parsed interface{} `json:"parsed,omitempty"`
1516
}
1617

1718
func parseTextExtension(name string, extensions map[string][]Extension) (value string) {

internal/shared/extparser.go

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ import (
77
"github.com/mmcdole/goxpp"
88
)
99

10+
type ExtParser interface {
11+
ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error)
12+
}
13+
14+
type ExtParsers map[string]ExtParser
15+
1016
// IsExtension returns whether or not the current
1117
// XML element is an extension element (if it has a
1218
// non empty prefix)
@@ -22,10 +28,16 @@ func IsExtension(p *xpp.XMLPullParser) bool {
2228
// ParseExtension parses the current element of the
2329
// XMLPullParser as an extension element and updates
2430
// the extension map
25-
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) {
31+
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser, extParsers ExtParsers) (ext.Extensions, error) {
2632
prefix := prefixForNamespace(p.Space, p)
2733

28-
result, err := parseExtensionElement(p)
34+
var result ext.Extension
35+
var err error
36+
if extParser, ok := extParsers[prefix]; ok {
37+
result, err = parseExtensionFromParser(p, extParser)
38+
} else {
39+
result, err = parseExtensionElement(p)
40+
}
2941
if err != nil {
3042
return nil, err
3143
}
@@ -43,6 +55,23 @@ func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, er
4355
return fe, nil
4456
}
4557

58+
func parseExtensionFromParser(p *xpp.XMLPullParser, extParser ExtParser) (e ext.Extension, err error) {
59+
if err = p.Expect(xpp.StartTag, "*"); err != nil {
60+
return e, err
61+
}
62+
63+
e.Name = p.Name
64+
if e.Parsed, err = extParser.ParseAsExtension(p); err != nil {
65+
return e, err
66+
}
67+
68+
if err = p.Expect(xpp.EndTag, e.Name); err != nil {
69+
return e, err
70+
}
71+
72+
return e, nil
73+
}
74+
4675
func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) {
4776
if err = p.Expect(xpp.StartTag, "*"); err != nil {
4877
return e, err

parser.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
"github.com/mmcdole/gofeed/atom"
1313
"github.com/mmcdole/gofeed/json"
14+
"github.com/mmcdole/gofeed/internal/shared"
1415
"github.com/mmcdole/gofeed/rss"
1516
)
1617

@@ -139,8 +140,19 @@ func (f *Parser) parseAtomFeed(feed io.Reader) (*Feed, error) {
139140
return f.atomTrans().Translate(af)
140141
}
141142

143+
func (f *Parser) BuildRSSExtParsers() shared.ExtParsers {
144+
extParsers := make(shared.ExtParsers, 3)
145+
146+
// all possible atom variants
147+
extParsers["atom"] = f.ap
148+
extParsers["atom10"] = f.ap
149+
extParsers["atom03"] = f.ap
150+
151+
return extParsers
152+
}
153+
142154
func (f *Parser) parseRSSFeed(feed io.Reader) (*Feed, error) {
143-
rf, err := f.rp.Parse(feed)
155+
rf, err := f.rp.Parse(feed, f.BuildRSSExtParsers())
144156
if err != nil {
145157
return nil, err
146158
}

rss/parser.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@ import (
1313
// Parser is a RSS Parser
1414
type Parser struct {
1515
base *shared.XMLBase
16+
extParsers shared.ExtParsers
1617
}
1718

1819
// Parse parses an xml feed into an rss.Feed
19-
func (rp *Parser) Parse(feed io.Reader) (*Feed, error) {
20+
func (rp *Parser) Parse(feed io.Reader, extParsers shared.ExtParsers) (*Feed, error) {
2021
p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel)
2122
rp.base = &shared.XMLBase{}
23+
rp.extParsers = extParsers
2224

2325
_, err := rp.base.FindRoot(p)
2426
if err != nil {
@@ -144,7 +146,8 @@ func (rp *Parser) parseChannel(p *xpp.XMLPullParser) (rss *Feed, err error) {
144146
name := strings.ToLower(p.Name)
145147

146148
if shared.IsExtension(p) {
147-
ext, err := shared.ParseExtension(extensions, p)
149+
150+
ext, err := shared.ParseExtension(extensions, p, rp.extParsers)
148151
if err != nil {
149152
return nil, err
150153
}
@@ -335,7 +338,7 @@ func (rp *Parser) parseItem(p *xpp.XMLPullParser) (item *Item, err error) {
335338
name := strings.ToLower(p.Name)
336339

337340
if shared.IsExtension(p) {
338-
ext, err := shared.ParseExtension(extensions, p)
341+
ext, err := shared.ParseExtension(extensions, p, rp.extParsers)
339342
if err != nil {
340343
return nil, err
341344
}

rss/parser_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"strings"
1010
"testing"
1111

12+
"github.com/mmcdole/gofeed"
1213
"github.com/mmcdole/gofeed/rss"
1314
"github.com/stretchr/testify/assert"
1415
)
@@ -27,7 +28,7 @@ func TestParser_Parse(t *testing.T) {
2728

2829
// Parse actual feed
2930
fp := &rss.Parser{}
30-
actual, _ := fp.Parse(bytes.NewReader(f))
31+
actual, _ := fp.Parse(bytes.NewReader(f), gofeed.NewParser().BuildRSSExtParsers())
3132

3233
// Get json encoded expected feed result
3334
ef := fmt.Sprintf("../testdata/parser/rss/%s.json", name)

0 commit comments

Comments
 (0)