From cedb0250344f8c19e7af9d56933d5f9183651c9d Mon Sep 17 00:00:00 2001 From: Samuel Williams Date: Wed, 19 Mar 2025 11:28:40 +1300 Subject: [PATCH 1/2] Fix header parsing, allowing for \t characters between VCHAR. --- lib/protocol/http1/connection.rb | 7 +- test/protocol/http1/connection/headers.rb | 106 ++++++++++++++++++++++ 2 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 test/protocol/http1/connection/headers.rb diff --git a/lib/protocol/http1/connection.rb b/lib/protocol/http1/connection.rb index feb451a..725dcad 100644 --- a/lib/protocol/http1/connection.rb +++ b/lib/protocol/http1/connection.rb @@ -37,8 +37,11 @@ module HTTP1 # HTTP/1.x header parser: FIELD_NAME = TOKEN - FIELD_VALUE = /[^\000-\037]*/.freeze - HEADER = /\A(#{FIELD_NAME}):\s*(#{FIELD_VALUE})\s*\z/.freeze + WS = /[ \t]/ # Whitespace. + OWS = /#{WS}*/ # Optional whitespace. + VCHAR = /[!-~]/ # Match visible characters from ASCII 33 to 126. + FIELD_VALUE = /(?:#{VCHAR}+(?:#{WS}+#{VCHAR})*)*/.freeze + HEADER = /\A(#{FIELD_NAME}):#{OWS}(#{FIELD_VALUE})#{OWS}\z/.freeze VALID_FIELD_NAME = /\A#{FIELD_NAME}\z/.freeze VALID_FIELD_VALUE = /\A#{FIELD_VALUE}\z/.freeze diff --git a/test/protocol/http1/connection/headers.rb b/test/protocol/http1/connection/headers.rb new file mode 100644 index 0000000..1ac8b72 --- /dev/null +++ b/test/protocol/http1/connection/headers.rb @@ -0,0 +1,106 @@ +# frozen_string_literal: true + +# Released under the MIT License. +# Copyright, 2025, by Samuel Williams. + +require "protocol/http1/connection" +require "connection_context" + +describe Protocol::HTTP1::Connection do + include_context ConnectionContext + + let(:headers) {Array.new} + + before do + client.stream.write "GET / HTTP/1.1\r\nHost: localhost\r\n#{headers.join("\r\n")}\r\n\r\n" + client.stream.close + end + + with "header that contains tab characters" do + let(:headers) {[ + "user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) \t\t\tChrome/55.0.2883.95 Safari/537.36" + ]} + + it "can parse the header" do + authority, method, target, version, headers, body = server.read_request + + expect(headers).to have_keys( + "user-agent" => be == "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) \t\t\tChrome/55.0.2883.95 Safari/537.36" + ) + end + end + + with "header that contains obsolete folding whitespace" do + let(:headers) {[ + "user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko)\n\tChrome/55.0.2883.95 Safari/537.36" + ]} + + it "rejects the request" do + expect do + server.read_request + end.to raise_exception(Protocol::HTTP1::BadHeader) + end + end + + with "header that contains invalid characters" do + let(:headers) {[ + "user-agent: Mozilla\x00Hacker Browser" + ]} + + it "rejects the request" do + expect do + server.read_request + end.to raise_exception(Protocol::HTTP1::BadHeader) + end + end + + with "header that contains invalid high characters" do + let(:headers) {[ + "user-agent: Mozilla\x7FHacker Browser" + ]} + + it "rejects the request" do + expect do + server.read_request + end.to raise_exception(Protocol::HTTP1::BadHeader) + end + end + + with "header that has empty value" do + let(:headers) {[ + "user-agent: " + ]} + + it "can parse the header" do + authority, method, target, version, headers, body = server.read_request + + expect(headers).to have_keys( + "user-agent" => be == "" + ) + end + end + + with "header that has invalid name" do + let(:headers) {[ + "invalid name: value" + ]} + + it "rejects the request" do + expect do + server.read_request + end.to raise_exception(Protocol::HTTP1::BadHeader) + end + end + + with "header that has empty name" do + let(:headers) {[ + ": value" + ]} + + it "rejects the request" do + expect do + server.read_request + end.to raise_exception(Protocol::HTTP1::BadHeader) + end + end +end From 890be0185919b9d289c305c21c7ec61e25883a3c Mon Sep 17 00:00:00 2001 From: Samuel Williams Date: Thu, 20 Mar 2025 11:54:20 +1300 Subject: [PATCH 2/2] Ensure regex are linear time. --- lib/protocol/http1/connection.rb | 6 ++--- test/protocol/http1/parser.rb | 40 ++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 test/protocol/http1/parser.rb diff --git a/lib/protocol/http1/connection.rb b/lib/protocol/http1/connection.rb index 725dcad..c2e5b26 100644 --- a/lib/protocol/http1/connection.rb +++ b/lib/protocol/http1/connection.rb @@ -40,8 +40,8 @@ module HTTP1 WS = /[ \t]/ # Whitespace. OWS = /#{WS}*/ # Optional whitespace. VCHAR = /[!-~]/ # Match visible characters from ASCII 33 to 126. - FIELD_VALUE = /(?:#{VCHAR}+(?:#{WS}+#{VCHAR})*)*/.freeze - HEADER = /\A(#{FIELD_NAME}):#{OWS}(#{FIELD_VALUE})#{OWS}\z/.freeze + FIELD_VALUE = /#{VCHAR}+(?:#{WS}+#{VCHAR}+)*/.freeze + HEADER = /\A(#{FIELD_NAME}):#{OWS}(?:(#{FIELD_VALUE})#{OWS})?\z/.freeze VALID_FIELD_NAME = /\A#{FIELD_NAME}\z/.freeze VALID_FIELD_VALUE = /\A#{FIELD_VALUE}\z/.freeze @@ -487,7 +487,7 @@ def read_headers break if line.empty? if match = line.match(HEADER) - fields << [match[1], match[2]] + fields << [match[1], match[2] || ""] else raise BadHeader, "Could not parse header: #{line.inspect}" end diff --git a/test/protocol/http1/parser.rb b/test/protocol/http1/parser.rb new file mode 100644 index 0000000..5bcecde --- /dev/null +++ b/test/protocol/http1/parser.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +# Released under the MIT License. +# Copyright, 2025, by Samuel Williams. + +require "protocol/http1/connection" + +describe Protocol::HTTP1 do + describe "REQUEST_LINE" do + it "parses in linear time" do + skip_unless_method_defined(:linear_time?, Regexp.singleton_class) + + expect(Regexp).to be(:linear_time?, Protocol::HTTP1::REQUEST_LINE) + end + end + + describe "HEADER" do + it "parses in linear time" do + skip_unless_method_defined(:linear_time?, Regexp.singleton_class) + + expect(Regexp).to be(:linear_time?, Protocol::HTTP1::HEADER) + end + end + + describe "VALID_FIELD_NAME" do + it "parses in linear time" do + skip_unless_method_defined(:linear_time?, Regexp.singleton_class) + + expect(Regexp).to be(:linear_time?, Protocol::HTTP1::VALID_FIELD_NAME) + end + end + + describe "VALID_FIELD_VALUE" do + it "parses in linear time" do + skip_unless_method_defined(:linear_time?, Regexp.singleton_class) + + expect(Regexp).to be(:linear_time?, Protocol::HTTP1::VALID_FIELD_VALUE) + end + end +end