diff --git a/app/lib/link_checker/uri_checker/http_checker.rb b/app/lib/link_checker/uri_checker/http_checker.rb index 94a6cb92..c8142722 100644 --- a/app/lib/link_checker/uri_checker/http_checker.rb +++ b/app/lib/link_checker/uri_checker/http_checker.rb @@ -89,7 +89,7 @@ def initialize(options = {}) end end - class FaradayError < Error + class HttpCommunicationError < Error def initialize(options = {}) super(suggested_fix: :determine_if_temporary, **options) end @@ -238,7 +238,7 @@ def make_request(method, check_ssl: true) response rescue Faraday::ConnectionFailed add_problem( - FaradayError.new( + HttpCommunicationError.new( summary: :website_unavailable, message: :website_host_offline, from_redirect: from_redirect?, @@ -247,7 +247,7 @@ def make_request(method, check_ssl: true) nil rescue Faraday::TimeoutError add_problem( - FaradayError.new( + HttpCommunicationError.new( summary: :website_unavailable, message: :page_is_not_responding, from_redirect: from_redirect?, @@ -269,9 +269,26 @@ def make_request(method, check_ssl: true) make_request(method, check_ssl: false) rescue Faraday::Error add_problem( - FaradayError.new( + HttpCommunicationError.new( summary: :page_unavailable, - message: :page_failing_to_load, + message: :technical_error_on_page, + from_redirect: from_redirect?, + ), + ) + nil + # Ruby net-http cannot handle responses with headers with CR/LF characters in, and such responses raise + # an argument error. We want to catch these and add to the report for now, rather than continuing to raise these + # as errors, which can trigger alerting. We are planning on raising this to be fixed at the net-http level which may + # mean we do not have to handle these here. + # Doing some fuzzy matching on the error message to try and ensure a bit of resilience, allowing for changes to the + # exact error message in the exception. + rescue ArgumentError => e + raise e unless e.message =~ /(.*)header(.*) CR\/LF/ + + add_problem( + HttpCommunicationError.new( + summary: :page_unavailable, + message: :technical_error_on_page, from_redirect: from_redirect?, ), ) diff --git a/app/lib/link_checker/uri_checker/problem.rb b/app/lib/link_checker/uri_checker/problem.rb index 77018dba..354bff2d 100644 --- a/app/lib/link_checker/uri_checker/problem.rb +++ b/app/lib/link_checker/uri_checker/problem.rb @@ -45,7 +45,7 @@ def get_string(symbol) UnusualUrl NotAvailableOnline NoHost - FaradayError + HttpCommunicationError PageNotFound PageRequiresLogin PageIsUnavailable diff --git a/config/locales/en.yml b/config/locales/en.yml index f86a57c4..ba585da2 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -103,9 +103,9 @@ en: singular: This page has a security problem that users may be alerted to. redirect: This redirects to a page with a security problem. - page_failing_to_load: - singular: This page is failing to load. - redirect: This redirects to a page that isn't loading. + technical_error_on_page: + singular: This page has a technical error. + redirect: This redirects to a page that has a technical error. determine_if_temporary: Determine if this is a temporary issue or the resource is no longer available. diff --git a/spec/lib/link_checker_spec.rb b/spec/lib/link_checker_spec.rb index c9d625ab..7f6ba01e 100644 --- a/spec/lib/link_checker_spec.rb +++ b/spec/lib/link_checker_spec.rb @@ -142,6 +142,26 @@ include_examples "has warnings" end + context "header with CR/LF character" do + let(:uri) { "http://www.not-gov.uk/header_with_CRLF_character" } + before do + stub_request(:get, uri) + .to_return(headers: { "Invalid" => "A header containing a carriage return \r character" }) + end + + include_examples "has errors" + include_examples "has a problem summary", "Page unavailable" + end + + it "does not recue from other argument error" do + uri = "http://www.not-gov.uk/raises_argument_error" + error = ArgumentError.new("something that's nothing to do with headers and carriage return line feed chars") + + stub_request(:get, uri).to_raise(error) + + expect { described_class.new(uri).call }.to raise_error(error) + end + context "slow response" do let(:uri) { "http://www.not-gov.uk/slow_response" }