Skip to content

Commit 0978a85

Browse files
committed
feat: integrate request_service and use ssrf_filter strategy by default
Signed-off-by: Gil Desmarais <[email protected]>
1 parent 4d79de0 commit 0978a85

File tree

8 files changed

+123
-68
lines changed

8 files changed

+123
-68
lines changed

app.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
require 'rack/cache'
55
require_relative 'roda/roda_plugins/basic_auth'
66

7+
require 'html2rss'
8+
require_relative 'app/ssrf_filter_strategy'
9+
710
module Html2rss
811
module Web
912
##
@@ -13,6 +16,10 @@ module Web
1316
class App < Roda
1417
CONTENT_TYPE_RSS = 'application/xml'
1518

19+
Html2rss::RequestService.register_strategy(:ssrf_filter, SsrfFilterStrategy)
20+
Html2rss::RequestService.default_strategy_name = :ssrf_filter
21+
Html2rss::RequestService.unregister_strategy(:faraday)
22+
1623
def self.development? = ENV['RACK_ENV'] == 'development'
1724

1825
opts[:check_dynamic_arity] = false
@@ -64,6 +71,8 @@ def self.development? = ENV['RACK_ENV'] == 'development'
6471
end
6572
end
6673

74+
@show_backtrace = !ENV['CI'].to_s.empty? || development?
75+
6776
route do |r|
6877
r.public
6978
r.hash_branches('')

app/ssrf_filter_strategy.rb

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# frozen_string_literal: true
2+
3+
require 'ssrf_filter'
4+
require 'html2rss'
5+
require_relative '../app/local_config'
6+
7+
module Html2rss
8+
module Web
9+
##
10+
# Strategy to fetch a URL using the SSRF filter.
11+
class SsrfFilterStrategy < Html2rss::RequestService::Strategy
12+
def execute
13+
headers = LocalConfig.global.fetch(:headers, {}).merge(
14+
ctx.headers.transform_keys(&:to_sym)
15+
)
16+
response = SsrfFilter.get(ctx.url, headers:)
17+
18+
Html2rss::RequestService::Response.new(body: response.body,
19+
headers: response.to_hash.transform_values(&:first))
20+
end
21+
end
22+
end
23+
end

helpers/auto_source.rb

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
require 'addressable'
44
require 'base64'
55
require 'html2rss'
6-
require 'ssrf_filter'
76

87
module Html2rss
98
module Web
@@ -20,20 +19,6 @@ def self.allowed_origins = ENV.fetch('AUTO_SOURCE_ALLOWED_ORIGINS', '')
2019
.reject(&:empty?)
2120
.to_set
2221

23-
# @param encoded_url [String] Base64 encoded URL
24-
# @return [RSS::Rss]
25-
def self.build_auto_source_from_encoded_url(encoded_url)
26-
url = Addressable::URI.parse Base64.urlsafe_decode64(encoded_url)
27-
request = SsrfFilter.get(url, headers: LocalConfig.global.fetch(:headers, {}))
28-
headers = request.to_hash.transform_values(&:first)
29-
30-
auto_source = Html2rss::AutoSource.new(url, body: request.body, headers:)
31-
32-
auto_source.channel.stylesheets << Html2rss::RssBuilder::Stylesheet.new(href: '/rss.xsl', type: 'text/xsl')
33-
34-
auto_source.build
35-
end
36-
3722
# @param rss [RSS::Rss]
3823
# @param default_in_minutes [Integer]
3924
# @return [Integer]

routes/auto_source.rb

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22

33
require_relative '../app/http_cache'
44
require_relative '../helpers/auto_source'
5+
require 'html2rss'
56

67
module Html2rss
78
module Web
89
class App
10+
# rubocop:disable Metrics/BlockLength
911
hash_branch 'auto_source' do |r|
1012
with_basic_auth(realm: 'Auto Source',
1113
username: AutoSource.username,
@@ -18,15 +20,29 @@ class App
1820
end
1921

2022
r.on String, method: :get do |encoded_url|
21-
rss = AutoSource.build_auto_source_from_encoded_url(encoded_url)
23+
strategy = request.params['strategy'].to_sym || :ssrf_filter
24+
unless Html2rss::RequestService.strategy_registered?(strategy)
25+
raise Html2rss::RequestService::UnknownStrategy
26+
end
27+
28+
response['Content-Type'] = CONTENT_TYPE_RSS
29+
30+
url = Addressable::URI.parse Base64.urlsafe_decode64(encoded_url)
31+
rss = Html2rss.auto_source(url, strategy:)
32+
33+
# Unfortunately, Ruby's rss gem does not provide a direct method to
34+
# add an XML stylesheet to the RSS::RSS object itself.
35+
stylesheet = Html2rss::RssBuilder::Stylesheet.new(href: '/rss.xsl', type: 'text/xsl').to_xml
36+
37+
xml_content = rss.to_xml
38+
xml_content.sub!(/^<\?xml version="1.0" encoding="UTF-8"\?>/,
39+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n#{stylesheet}")
2240

2341
HttpCache.expires response,
2442
AutoSource.ttl_in_seconds(rss),
2543
cache_control: 'private, must-revalidate'
2644

27-
response['Content-Type'] = CONTENT_TYPE_RSS
28-
29-
rss.to_s
45+
xml_content
3046
end
3147
else
3248
# auto_source feature is disabled
@@ -37,6 +53,7 @@ class App
3753
end
3854
end
3955
end
56+
# rubocop:enable Metrics/BlockLength
4057
end
4158
end
4259
end

spec/fixtures/vcr_cassettes/auto_source-github-h2r-web.yml

Lines changed: 16 additions & 15 deletions
Large diffs are not rendered by default.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# frozen_string_literal: true
2+
3+
require 'spec_helper'
4+
require_relative '../../../../app/ssrf_filter_strategy'
5+
6+
RSpec.describe Html2rss::Web::SsrfFilterStrategy do
7+
subject(:instance) { described_class.new(ctx) }
8+
9+
let(:url) { 'http://example.com' }
10+
let(:headers) { { 'User-Agent': 'Mozilla/5.0' } }
11+
let(:ctx) { instance_double(Html2rss::RequestService::Context, url:, headers:) }
12+
13+
describe '#execute' do
14+
before do
15+
allow(SsrfFilter).to receive(:get).with(url, headers:).and_return(
16+
instance_double(Net::HTTPResponse, body: 'body', to_hash: { 'Content-Type' => ['text/html'] })
17+
)
18+
end
19+
20+
it 'returns a response', :aggregate_failures do
21+
response = instance.execute
22+
23+
expect(SsrfFilter).to have_received(:get).with(url, headers:)
24+
expect(response).to be_a(Html2rss::RequestService::Response)
25+
expect(response.body).to eq('body')
26+
expect(response.headers).to eq('Content-Type' => 'text/html')
27+
end
28+
end
29+
end

spec/html2rss/web/helpers/auto_source_spec.rb

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -149,31 +149,4 @@
149149
end
150150
end
151151
# rubocop:enable RSpec/NamedSubject, RSpec/MessageSpies
152-
153-
describe '.build_auto_source_from_encoded_url' do
154-
subject(:feed) do
155-
VCR.use_cassette('auto_source-github-h2r-web', match_requests_on: %i[method path]) do
156-
described_class.build_auto_source_from_encoded_url(encoded_url)
157-
end
158-
end
159-
160-
before do
161-
allow(SsrfFilter).to receive(:get).with(any_args).and_call_original
162-
end
163-
164-
let(:url) { 'https://github.com/html2rss/html2rss-web/commits/master' }
165-
let(:encoded_url) { Base64.urlsafe_encode64(url) }
166-
167-
it 'returns an RSS::Rss object' do
168-
expect(feed).to be_a(RSS::Rss)
169-
end
170-
171-
it 'sets headers in the http request' do
172-
feed
173-
expect(SsrfFilter).to have_received(:get).with(Addressable::URI.parse(url),
174-
headers: {
175-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36' # rubocop:disable Layout/LineLength
176-
})
177-
end
178-
end
179152
end

spec/routes/auto_source_spec.rb

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
require 'spec_helper'
44
require 'rss'
55
require_relative '../../app'
6+
require 'html2rss'
67

78
RSpec.describe Html2rss::Web::App do # rubocop:disable RSpec/SpecFilePathFormat
89
include Rack::Test::Methods
@@ -11,6 +12,7 @@ def app = described_class
1112
let(:request_headers) do
1213
{ 'HTTP_HOST' => 'localhost' }
1314
end
15+
let(:encoded_url) { Base64.urlsafe_encode64('https://github.com/html2rss/html2rss-web/commits/master') }
1416

1517
let(:username) { 'username' }
1618
let(:password) { 'password' }
@@ -27,8 +29,7 @@ def app = described_class
2729
allow(Html2rss::Web::AutoSource).to receive_messages(enabled?: true,
2830
username:,
2931
password:,
30-
allowed_origins: Set['localhost'],
31-
build_auto_source_from_encoded_url: feed)
32+
allowed_origins: Set['localhost'])
3233
end
3334

3435
describe "GET '/auto_source/'" do
@@ -66,18 +67,35 @@ def app = described_class
6667
describe "GET '/auto_source/:encoded_url'" do
6768
context 'with provided basic auth' do
6869
subject(:response) do
69-
get "/auto_source/#{Base64.urlsafe_encode64('https://github.com/html2rss/html2rss-web')}",
70-
{},
71-
request_headers.merge('HTTP_AUTHORIZATION' => basic_authorize(username, password))
70+
VCR.use_cassette('auto_source-github-h2r-web') do
71+
get "/auto_source/#{encoded_url}?strategy",
72+
{},
73+
request_headers.merge('HTTP_AUTHORIZATION' => basic_authorize(username, password))
74+
end
7275
end
7376

7477
it 'responds successfully', :aggregate_failures do
7578
expect(response).to be_ok
7679
expect(response.body).to start_with '<?xml version="1.0" encoding="UTF-8"?>'
77-
expect(response.get_header('cache-control')).to eq 'must-revalidate, private, max-age=3600'
80+
expect(response.get_header('cache-control')).to eq 'must-revalidate, private, max-age=0'
7881
expect(response.get_header('content-type')).to eq described_class::CONTENT_TYPE_RSS
7982
end
8083
end
84+
85+
context 'when strategy is not registered' do
86+
subject(:response) do
87+
VCR.use_cassette('auto_source-github-h2r-web', match_requests_on: [:path]) do
88+
get "/auto_source/#{encoded_url}?strategy=nope",
89+
{},
90+
request_headers.merge('HTTP_AUTHORIZATION' => basic_authorize(username, password))
91+
end
92+
end
93+
94+
it 'responds with Error', :aggregate_failures do
95+
expect(response.status).to eq 422
96+
expect(response.body).to match(/UnknownStrategy/)
97+
end
98+
end
8199
end
82100

83101
context 'when auto_source feature is disabled' do
@@ -96,7 +114,7 @@ def app = described_class
96114

97115
describe "GET '/auto_source/:encoded_url'" do
98116
it 'responds with 400 Bad Request', :aggregate_failures do
99-
get "/auto_source/#{Base64.urlsafe_encode64('https://github.com/html2rss/html2rss-web')}",
117+
get "/auto_source/#{encoded_url}",
100118
{},
101119
request_headers.merge('HTTP_AUTHORIZATION' => basic_authorize(username, password))
102120

0 commit comments

Comments
 (0)