Skip to content

Commit f9b6653

Browse files
Improve RSS importer with canonical_link and extract_tags option (#489)
Merge pull request 489
1 parent 098b02d commit f9b6653

File tree

2 files changed

+34
-8
lines changed

2 files changed

+34
-8
lines changed

docs/_importers/rss.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,10 @@ $ ruby -r rubygems -e 'require "jekyll-import";
1717
{% endhighlight %}
1818

1919
The `source` field is required and can be either a local file or a remote one.
20+
Other optional fields are as follows:
21+
* `canonical_link` – copy original link as `canonical_url` to post. (default: `false`)
22+
* `render_audio` – render `<audio>` element in posts for the enclosure URLs (default: `false`)
23+
* `tag` – add a specific tag to all posts
24+
* `extract_tags` – copies tags from the given subfield on the RSS `<item>`
25+
26+
__Note:__ `tag` and `extract_tags` are exclusive option, both can not be provided together.

lib/jekyll-import/importers/rss.rb

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,15 @@ module Importers
55
class RSS < Importer
66
def self.specify_options(c)
77
c.option "source", "--source NAME", "The RSS file or URL to import"
8-
c.option "tag", "--tag NAME", "Add a tag to posts"
9-
c.option "render_audio", "--render_audio", "Render <audio> element as necessary"
8+
c.option "tag", "--tag NAME", "Add a specific tag to all posts"
9+
c.option "extract_tags", "--extract_tags KEY", "Copies tags from the given subfield on the RSS <item>"
10+
c.option "render_audio", "--render_audio", "Render <audio> element in posts for the enclosure URLs (default: false)"
11+
c.option "canonical_link", "--canonical_link", "Copy original link as canonical_url to post. (default: false)"
1012
end
1113

1214
def self.validate(options)
1315
abort "Missing mandatory option --source." if options["source"].nil?
16+
abort "Provide either --tag or --extract_tags option." if options["extract_tags"] && options["tag"]
1417
end
1518

1619
def self.require_deps
@@ -33,7 +36,7 @@ def self.process(options)
3336
source = options.fetch("source")
3437

3538
content = ""
36-
open(source) { |s| content = s.read }
39+
URI.open(source) { |s| content = s.read }
3740
rss = ::RSS::Parser.parse(content, false)
3841

3942
raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss
@@ -52,13 +55,14 @@ def self.write_rss_item(item, options)
5255
post_name = Jekyll::Utils.slugify(item.title, :mode => "latin")
5356
name = "#{formatted_date}-#{post_name}"
5457
audio = render_audio && item.enclosure.url
58+
canonical_link = options.fetch("canonical_link", false)
5559

5660
header = {
57-
"layout" => "post",
58-
"title" => item.title,
59-
}
60-
61-
header["tag"] = options["tag"] unless options["tag"].nil? || options["tag"].empty?
61+
"layout" => "post",
62+
"title" => item.title,
63+
"canonical_url" => (canonical_link ? item.link : nil),
64+
"tag" => get_tags(item, options),
65+
}.compact
6266

6367
frontmatter.each do |value|
6468
header[value] = item.send(value)
@@ -91,6 +95,21 @@ def self.write_rss_item(item, options)
9195
f.puts output
9296
end
9397
end
98+
99+
def self.get_tags(item, options)
100+
explicit_tag = options["tag"]
101+
return explicit_tag unless explicit_tag.nil? || explicit_tag.empty?
102+
103+
tags_reference = options["extract_tags"]
104+
return unless tags_reference
105+
106+
tags_from_feed = item.instance_variable_get("@#{tags_reference}")
107+
return unless tags_from_feed.is_a?(Array)
108+
109+
tags = tags_from_feed.map { |feed_tag| feed_tag.content.downcase }
110+
tags.empty? ? nil : tags.tap(&:uniq!)
111+
end
112+
private_class_method :get_tags
94113
end
95114
end
96115
end

0 commit comments

Comments
 (0)