Skip to content

Commit 5251f76

Browse files
committed
#151: Allow additional parsers for feed formats. Currently only atom is allowed as part of RSS
1 parent 0fb9c69 commit 5251f76

File tree

6 files changed

+167
-101
lines changed

6 files changed

+167
-101
lines changed

atom/parser.go

Lines changed: 112 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ var (
2121
"uri": true,
2222
"url": true, // atom 0.3
2323
}
24+
25+
// No known explicit extension parsers for Atom, currently
26+
emptyExtParsers = make(shared.ExtParsers)
2427
)
2528

2629
// Parser is an Atom Parser
@@ -38,6 +41,14 @@ func (ap *Parser) Parse(feed io.Reader) (*Feed, error) {
3841
return ap.parseRoot(p)
3942
}
4043

44+
func (ap *Parser) ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error) {
45+
entry := &Entry{}
46+
if err := ap.parseEntryContent(p, entry); err != nil {
47+
return nil, err
48+
}
49+
return entry, nil
50+
}
51+
4152
func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
4253
if err := p.Expect(xpp.StartTag, "feed"); err != nil {
4354
return nil, err
@@ -69,7 +80,7 @@ func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
6980
name := strings.ToLower(p.Name)
7081

7182
if shared.IsExtension(p) {
72-
e, err := shared.ParseExtension(extensions, p)
83+
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
7384
if err != nil {
7485
return nil, err
7586
}
@@ -215,103 +226,14 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
215226
}
216227

217228
if tok == xpp.StartTag {
218-
219-
name := strings.ToLower(p.Name)
220-
221229
if shared.IsExtension(p) {
222-
e, err := shared.ParseExtension(extensions, p)
230+
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
223231
if err != nil {
224232
return nil, err
225233
}
226234
extensions = e
227-
} else if name == "title" {
228-
result, err := ap.parseAtomText(p)
229-
if err != nil {
230-
return nil, err
231-
}
232-
entry.Title = result
233-
} else if name == "id" {
234-
result, err := ap.parseAtomText(p)
235-
if err != nil {
236-
return nil, err
237-
}
238-
entry.ID = result
239-
} else if name == "rights" ||
240-
name == "copyright" {
241-
result, err := ap.parseAtomText(p)
242-
if err != nil {
243-
return nil, err
244-
}
245-
entry.Rights = result
246-
} else if name == "summary" {
247-
result, err := ap.parseAtomText(p)
248-
if err != nil {
249-
return nil, err
250-
}
251-
entry.Summary = result
252-
} else if name == "source" {
253-
result, err := ap.parseSource(p)
254-
if err != nil {
255-
return nil, err
256-
}
257-
entry.Source = result
258-
} else if name == "updated" ||
259-
name == "modified" {
260-
result, err := ap.parseAtomText(p)
261-
if err != nil {
262-
return nil, err
263-
}
264-
entry.Updated = result
265-
date, err := shared.ParseDate(result)
266-
if err == nil {
267-
utcDate := date.UTC()
268-
entry.UpdatedParsed = &utcDate
269-
}
270-
} else if name == "contributor" {
271-
result, err := ap.parsePerson("contributor", p)
272-
if err != nil {
273-
return nil, err
274-
}
275-
entry.Contributors = append(entry.Contributors, result)
276-
} else if name == "author" {
277-
result, err := ap.parsePerson("author", p)
278-
if err != nil {
279-
return nil, err
280-
}
281-
entry.Authors = append(entry.Authors, result)
282-
} else if name == "category" {
283-
result, err := ap.parseCategory(p)
284-
if err != nil {
285-
return nil, err
286-
}
287-
entry.Categories = append(entry.Categories, result)
288-
} else if name == "link" {
289-
result, err := ap.parseLink(p)
290-
if err != nil {
291-
return nil, err
292-
}
293-
entry.Links = append(entry.Links, result)
294-
} else if name == "published" ||
295-
name == "issued" {
296-
result, err := ap.parseAtomText(p)
297-
if err != nil {
298-
return nil, err
299-
}
300-
entry.Published = result
301-
date, err := shared.ParseDate(result)
302-
if err == nil {
303-
utcDate := date.UTC()
304-
entry.PublishedParsed = &utcDate
305-
}
306-
} else if name == "content" {
307-
result, err := ap.parseContent(p)
308-
if err != nil {
309-
return nil, err
310-
}
311-
entry.Content = result
312235
} else {
313-
err := p.Skip()
314-
if err != nil {
236+
if err := ap.parseEntryContent(p, entry); err != nil {
315237
return nil, err
316238
}
317239
}
@@ -329,6 +251,103 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
329251
return entry, nil
330252
}
331253

254+
func (ap *Parser) parseEntryContent(p *xpp.XMLPullParser, entry *Entry) error {
255+
name := strings.ToLower(p.Name)
256+
257+
if name == "title" {
258+
result, err := ap.parseAtomText(p)
259+
if err != nil {
260+
return err
261+
}
262+
entry.Title = result
263+
} else if name == "id" {
264+
result, err := ap.parseAtomText(p)
265+
if err != nil {
266+
return err
267+
}
268+
entry.ID = result
269+
} else if name == "rights" ||
270+
name == "copyright" {
271+
result, err := ap.parseAtomText(p)
272+
if err != nil {
273+
return err
274+
}
275+
entry.Rights = result
276+
} else if name == "summary" {
277+
result, err := ap.parseAtomText(p)
278+
if err != nil {
279+
return err
280+
}
281+
entry.Summary = result
282+
} else if name == "source" {
283+
result, err := ap.parseSource(p)
284+
if err != nil {
285+
return err
286+
}
287+
entry.Source = result
288+
} else if name == "updated" ||
289+
name == "modified" {
290+
result, err := ap.parseAtomText(p)
291+
if err != nil {
292+
return err
293+
}
294+
entry.Updated = result
295+
date, err := shared.ParseDate(result)
296+
if err == nil {
297+
utcDate := date.UTC()
298+
entry.UpdatedParsed = &utcDate
299+
}
300+
} else if name == "contributor" {
301+
result, err := ap.parsePerson("contributor", p)
302+
if err != nil {
303+
return err
304+
}
305+
entry.Contributors = append(entry.Contributors, result)
306+
} else if name == "author" {
307+
result, err := ap.parsePerson("author", p)
308+
if err != nil {
309+
return err
310+
}
311+
entry.Authors = append(entry.Authors, result)
312+
} else if name == "category" {
313+
result, err := ap.parseCategory(p)
314+
if err != nil {
315+
return err
316+
}
317+
entry.Categories = append(entry.Categories, result)
318+
} else if name == "link" {
319+
result, err := ap.parseLink(p)
320+
if err != nil {
321+
return err
322+
}
323+
entry.Links = append(entry.Links, result)
324+
} else if name == "published" ||
325+
name == "issued" {
326+
result, err := ap.parseAtomText(p)
327+
if err != nil {
328+
return err
329+
}
330+
entry.Published = result
331+
date, err := shared.ParseDate(result)
332+
if err == nil {
333+
utcDate := date.UTC()
334+
entry.PublishedParsed = &utcDate
335+
}
336+
} else if name == "content" {
337+
result, err := ap.parseContent(p)
338+
if err != nil {
339+
return err
340+
}
341+
entry.Content = result
342+
} else {
343+
err := p.Skip()
344+
if err != nil {
345+
return err
346+
}
347+
}
348+
return nil
349+
}
350+
332351
func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {
333352

334353
if err := p.Expect(xpp.StartTag, "source"); err != nil {
@@ -358,7 +377,7 @@ func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {
358377
name := strings.ToLower(p.Name)
359378

360379
if shared.IsExtension(p) {
361-
e, err := shared.ParseExtension(extensions, p)
380+
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
362381
if err != nil {
363382
return nil, err
364383
}

extensions/extensions.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ type Extension struct {
1212
Value string `json:"value"`
1313
Attrs map[string]string `json:"attrs"`
1414
Children map[string][]Extension `json:"children"`
15+
Parsed interface{} `json:"parsed,omitempty"`
1516
}
1617

1718
func parseTextExtension(name string, extensions map[string][]Extension) (value string) {

internal/shared/extparser.go

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ import (
77
"github.com/mmcdole/goxpp"
88
)
99

10+
type ExtParser interface {
11+
ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error)
12+
}
13+
14+
type ExtParsers map[string]ExtParser
15+
1016
// IsExtension returns whether or not the current
1117
// XML element is an extension element (if it has a
1218
// non empty prefix)
@@ -22,10 +28,16 @@ func IsExtension(p *xpp.XMLPullParser) bool {
2228
// ParseExtension parses the current element of the
2329
// XMLPullParser as an extension element and updates
2430
// the extension map
25-
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) {
31+
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser, extParsers ExtParsers) (ext.Extensions, error) {
2632
prefix := prefixForNamespace(p.Space, p)
2733

28-
result, err := parseExtensionElement(p)
34+
var result ext.Extension
35+
var err error
36+
if extParser, ok := extParsers[prefix]; ok {
37+
result, err = parseExtensionFromParser(p, extParser)
38+
} else {
39+
result, err = parseExtensionElement(p)
40+
}
2941
if err != nil {
3042
return nil, err
3143
}
@@ -43,6 +55,23 @@ func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, er
4355
return fe, nil
4456
}
4557

58+
func parseExtensionFromParser(p *xpp.XMLPullParser, extParser ExtParser) (e ext.Extension, err error) {
59+
if err = p.Expect(xpp.StartTag, "*"); err != nil {
60+
return e, err
61+
}
62+
63+
e.Name = p.Name
64+
if e.Parsed, err = extParser.ParseAsExtension(p); err != nil {
65+
return e, err
66+
}
67+
68+
if err = p.Expect(xpp.EndTag, e.Name); err != nil {
69+
return e, err
70+
}
71+
72+
return e, nil
73+
}
74+
4675
func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) {
4776
if err = p.Expect(xpp.StartTag, "*"); err != nil {
4877
return e, err

parser.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
"github.com/mmcdole/gofeed/atom"
1313
"github.com/mmcdole/gofeed/json"
14+
"github.com/mmcdole/gofeed/internal/shared"
1415
"github.com/mmcdole/gofeed/rss"
1516
)
1617

@@ -155,8 +156,19 @@ func (f *Parser) parseAtomFeed(feed io.Reader) (*Feed, error) {
155156
return f.atomTrans().Translate(af)
156157
}
157158

159+
func (f *Parser) BuildRSSExtParsers() shared.ExtParsers {
160+
extParsers := make(shared.ExtParsers, 3)
161+
162+
// all possible atom variants
163+
extParsers["atom"] = f.ap
164+
extParsers["atom10"] = f.ap
165+
extParsers["atom03"] = f.ap
166+
167+
return extParsers
168+
}
169+
158170
func (f *Parser) parseRSSFeed(feed io.Reader) (*Feed, error) {
159-
rf, err := f.rp.Parse(feed)
171+
rf, err := f.rp.Parse(feed, f.BuildRSSExtParsers())
160172
if err != nil {
161173
return nil, err
162174
}

rss/parser.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,14 @@ import (
1111
)
1212

1313
// Parser is a RSS Parser
14-
type Parser struct{}
14+
type Parser struct {
15+
extParsers shared.ExtParsers
16+
}
1517

1618
// Parse parses an xml feed into an rss.Feed
17-
func (rp *Parser) Parse(feed io.Reader) (*Feed, error) {
19+
func (rp *Parser) Parse(feed io.Reader, extParsers shared.ExtParsers) (*Feed, error) {
1820
p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel)
21+
rp.extParsers = extParsers
1922

2023
_, err := shared.FindRoot(p)
2124
if err != nil {
@@ -141,7 +144,8 @@ func (rp *Parser) parseChannel(p *xpp.XMLPullParser) (rss *Feed, err error) {
141144
name := strings.ToLower(p.Name)
142145

143146
if shared.IsExtension(p) {
144-
ext, err := shared.ParseExtension(extensions, p)
147+
148+
ext, err := shared.ParseExtension(extensions, p, rp.extParsers)
145149
if err != nil {
146150
return nil, err
147151
}
@@ -338,7 +342,7 @@ func (rp *Parser) parseItem(p *xpp.XMLPullParser) (item *Item, err error) {
338342
name := strings.ToLower(p.Name)
339343

340344
if shared.IsExtension(p) {
341-
ext, err := shared.ParseExtension(extensions, p)
345+
ext, err := shared.ParseExtension(extensions, p, rp.extParsers)
342346
if err != nil {
343347
return nil, err
344348
}

rss/parser_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"strings"
1010
"testing"
1111

12+
"github.com/mmcdole/gofeed"
1213
"github.com/mmcdole/gofeed/rss"
1314
"github.com/stretchr/testify/assert"
1415
)
@@ -27,7 +28,7 @@ func TestParser_Parse(t *testing.T) {
2728

2829
// Parse actual feed
2930
fp := &rss.Parser{}
30-
actual, _ := fp.Parse(bytes.NewReader(f))
31+
actual, _ := fp.Parse(bytes.NewReader(f), gofeed.NewParser().BuildRSSExtParsers())
3132

3233
// Get json encoded expected feed result
3334
ef := fmt.Sprintf("../testdata/parser/rss/%s.json", name)

0 commit comments

Comments
 (0)