From e1d33fea6ddcfe69de4c31be04908525eddf238a Mon Sep 17 00:00:00 2001 From: Bryan Call Date: Fri, 19 Dec 2025 13:45:58 -0800 Subject: [PATCH 1/2] Fix: HTTPHdr host cache invalidation when Host header modified When a plugin or internal code modifies the Host header via MIME layer functions (e.g., mime_field_value_set), the HTTPHdr cached host info becomes stale. The cached m_host_length doesn't match the new value, causing host_get() to return incorrect data with garbage characters appended (e.g., SNI warnings showing 'mysterio.yahoo.com^R'). Root cause: HTTPHdr caches host info in _fill_target_cache() but the MIME layer modifications bypass HTTPHdr, leaving m_target_cached true with stale m_host_length. Fix: Detect staleness by caching both the host value pointer and raw length. On access, compare current MIMEField values against cached values. If either differs, the value was modified and we refill the cache. This approach: - Keeps cache logic entirely within HTTPHdr (clean separation) - Doesn't modify MIMEHdrImpl (safe for disk cache format) - Detects all modification paths including plugin API calls Added comprehensive unit tests covering host header modifications with various scenarios including port handling. --- include/proxy/hdrs/HTTP.h | 27 +- src/proxy/hdrs/HTTP.cc | 7 +- src/proxy/hdrs/unit_tests/test_Hdrs.cc | 364 +++++++++++++++++++++++++ 3 files changed, 387 insertions(+), 11 deletions(-) diff --git a/include/proxy/hdrs/HTTP.h b/include/proxy/hdrs/HTTP.h index 8759fcc09ab..c7c0f65a618 100644 --- a/include/proxy/hdrs/HTTP.h +++ b/include/proxy/hdrs/HTTP.h @@ -447,15 +447,17 @@ class IOBufferReader; class HTTPHdr : public MIMEHdr { public: - HTTPHdrImpl *m_http = nullptr; - mutable URL m_url_cached; - mutable MIMEField *m_host_mime = nullptr; - mutable int m_host_length = 0; ///< Length of hostname. - mutable int m_port = 0; ///< Target port. - mutable bool m_target_cached = false; ///< Whether host name and port are cached. - mutable bool m_target_in_url = false; ///< Whether host name and port are in the URL. - mutable bool m_100_continue_sent = false; ///< Whether ATS sent a 100 Continue optimized response. - mutable bool m_100_continue_required = false; ///< Whether 100_continue is in the Expect header. + HTTPHdrImpl *m_http = nullptr; + mutable URL m_url_cached; + mutable MIMEField *m_host_mime = nullptr; + mutable const char *m_host_ptr = nullptr; ///< Cached host value pointer for staleness detection. + mutable int m_host_value_len = 0; ///< Cached raw Host header value length for staleness detection. + mutable int m_host_length = 0; ///< Length of hostname (parsed, excludes port). + mutable int m_port = 0; ///< Target port. + mutable bool m_target_cached = false; ///< Whether host name and port are cached. + mutable bool m_target_in_url = false; ///< Whether host name and port are in the URL. + mutable bool m_100_continue_sent = false; ///< Whether ATS sent a 100 Continue optimized response. + mutable bool m_100_continue_required = false; ///< Whether 100_continue is in the Expect header. /// Set if the port was effectively specified in the header. /// @c true if the target (in the URL or the HOST field) also specified /// a port. That is, @c true if whatever source had the target host @@ -732,8 +734,13 @@ HTTPHdr::print(char *buf, int bufsize, int *bufindex, int *dumpoffset) const inline void HTTPHdr::_test_and_fill_target_cache() const { - if (!m_target_cached) + // Check if cache is stale: either not cached, or the Host header value has changed. + // We check both pointer and length to detect modifications even in the unlikely case + // where heap compaction causes a new allocation at the same address. + if (!m_target_cached || + (m_host_mime && (m_host_mime->m_ptr_value != m_host_ptr || m_host_mime->m_len_value != m_host_value_len))) { this->_fill_target_cache(); + } } /*------------------------------------------------------------------------- diff --git a/src/proxy/hdrs/HTTP.cc b/src/proxy/hdrs/HTTP.cc index f0db0665e7f..079c7cc2507 100644 --- a/src/proxy/hdrs/HTTP.cc +++ b/src/proxy/hdrs/HTTP.cc @@ -1640,6 +1640,8 @@ HTTPHdr::_fill_target_cache() const m_target_in_url = false; m_port_in_header = false; m_host_mime = nullptr; + m_host_ptr = nullptr; + m_host_value_len = 0; // Check in the URL first, then the HOST field. std::string_view host; if (host = url->host_get(); nullptr != host.data()) { @@ -1654,7 +1656,10 @@ HTTPHdr::_fill_target_cache() const m_host_length = static_cast(host.length()); if (m_host_mime != nullptr) { - m_port = 0; + // Cache pointer and length for staleness detection - if either changes, the value was modified. + m_host_ptr = m_host_mime->m_ptr_value; + m_host_value_len = m_host_mime->m_len_value; + m_port = 0; if (!port.empty()) { for (auto c : port) { if (isdigit(c)) { diff --git a/src/proxy/hdrs/unit_tests/test_Hdrs.cc b/src/proxy/hdrs/unit_tests/test_Hdrs.cc index 32482576e7d..fef0166d499 100644 --- a/src/proxy/hdrs/unit_tests/test_Hdrs.cc +++ b/src/proxy/hdrs/unit_tests/test_Hdrs.cc @@ -2104,3 +2104,367 @@ TEST_CASE("HdrTest", "[proxy][hdrtest]") } } } + +// Test that HTTPHdr::host_get() returns correct length after Host header is modified. +// This reproduces a bug where the cached host length was stale after the Host header +// was changed via MIME layer functions, causing host_get() to return a string_view +// with the wrong length (reading garbage bytes past the actual hostname). +TEST_CASE("HdrTestHostCacheInvalidation", "[proxy][hdrtest]") +{ + hdrtoken_init(); + url_init(); + mime_init(); + http_init(); + + // Test case: modify Host header to a shorter value, verify host_get() returns correct length + SECTION("Host header modification invalidates cache") + { + static const char request[] = { + "GET / HTTP/1.1\r\n" + "Host: very.long.hostname.example.com\r\n" + "\r\n", + }; + + HTTPHdr req_hdr; + HTTPParser parser; + const char *start = request; + const char *end = start + strlen(start); + + http_parser_init(&parser); + req_hdr.create(HTTPType::REQUEST); + + ParseResult err; + while (true) { + err = req_hdr.parse_req(&parser, &start, end, true); + if (err != ParseResult::CONT) { + break; + } + } + REQUIRE(err == ParseResult::DONE); + http_parser_clear(&parser); + + // First call to host_get() - this populates the cache + std::string_view host1 = req_hdr.host_get(); + REQUIRE(host1 == "very.long.hostname.example.com"sv); + REQUIRE(host1.length() == 30); + + // Now modify the Host header to a SHORTER value via MIME layer + MIMEField *host_field = req_hdr.field_find("Host"sv); + REQUIRE(host_field != nullptr); + + // Set to a shorter hostname - this is what plugins do + host_field->value_set(req_hdr.m_heap, req_hdr.m_mime, "short.com"sv); + + // Second call to host_get() - should return the NEW, SHORTER hostname + // BUG: Without the fix, this returns a string_view with the old cached length (30) + // but pointing to the new value buffer, causing it to read garbage past "short.com" + std::string_view host2 = req_hdr.host_get(); + + // This is the key assertion that fails without the fix: + // The length should be 9 ("short.com"), not 30 (the old cached length) + CHECK(host2.length() == 9); + CHECK(host2 == "short.com"sv); + + req_hdr.destroy(); + } + + // Test case: modify Host header to a longer value + SECTION("Host header modification to longer value") + { + static const char request[] = { + "GET / HTTP/1.1\r\n" + "Host: short.com\r\n" + "\r\n", + }; + + HTTPHdr req_hdr; + HTTPParser parser; + const char *start = request; + const char *end = start + strlen(start); + + http_parser_init(&parser); + req_hdr.create(HTTPType::REQUEST); + + ParseResult err; + while (true) { + err = req_hdr.parse_req(&parser, &start, end, true); + if (err != ParseResult::CONT) { + break; + } + } + REQUIRE(err == ParseResult::DONE); + http_parser_clear(&parser); + + // First call to host_get() - populates the cache + std::string_view host1 = req_hdr.host_get(); + REQUIRE(host1 == "short.com"sv); + + // Modify to a longer hostname + MIMEField *host_field = req_hdr.field_find("Host"sv); + REQUIRE(host_field != nullptr); + host_field->value_set(req_hdr.m_heap, req_hdr.m_mime, "very.long.hostname.example.com"sv); + + // Should return the new longer hostname + std::string_view host2 = req_hdr.host_get(); + CHECK(host2.length() == 30); + CHECK(host2 == "very.long.hostname.example.com"sv); + + req_hdr.destroy(); + } + + // Test case: multiple modifications + SECTION("Multiple Host header modifications") + { + static const char request[] = { + "GET / HTTP/1.1\r\n" + "Host: first.example.com\r\n" + "\r\n", + }; + + HTTPHdr req_hdr; + HTTPParser parser; + const char *start = request; + const char *end = start + strlen(start); + + http_parser_init(&parser); + req_hdr.create(HTTPType::REQUEST); + + ParseResult err; + while (true) { + err = req_hdr.parse_req(&parser, &start, end, true); + if (err != ParseResult::CONT) { + break; + } + } + REQUIRE(err == ParseResult::DONE); + http_parser_clear(&parser); + + MIMEField *host_field = req_hdr.field_find("Host"sv); + REQUIRE(host_field != nullptr); + + // Initial + std::string_view host = req_hdr.host_get(); + REQUIRE(host == "first.example.com"sv); + + // Modification 1 + host_field->value_set(req_hdr.m_heap, req_hdr.m_mime, "second.com"sv); + host = req_hdr.host_get(); + CHECK(host == "second.com"sv); + + // Modification 2 + host_field->value_set(req_hdr.m_heap, req_hdr.m_mime, "third.very.long.example.org"sv); + host = req_hdr.host_get(); + CHECK(host == "third.very.long.example.org"sv); + + // Modification 3 - back to short + host_field->value_set(req_hdr.m_heap, req_hdr.m_mime, "x.co"sv); + host = req_hdr.host_get(); + CHECK(host == "x.co"sv); + + req_hdr.destroy(); + } + + // Test case: Host header with port - incoming request has port + SECTION("Host header with port - modify hostname only") + { + static const char request[] = { + "GET / HTTP/1.1\r\n" + "Host: original.example.com:8080\r\n" + "\r\n", + }; + + HTTPHdr req_hdr; + HTTPParser parser; + const char *start = request; + const char *end = start + strlen(start); + + http_parser_init(&parser); + req_hdr.create(HTTPType::REQUEST); + + ParseResult err; + while (true) { + err = req_hdr.parse_req(&parser, &start, end, true); + if (err != ParseResult::CONT) { + break; + } + } + REQUIRE(err == ParseResult::DONE); + http_parser_clear(&parser); + + // First call - populates the cache + std::string_view host1 = req_hdr.host_get(); + int port1 = req_hdr.port_get(); + REQUIRE(host1 == "original.example.com"sv); + REQUIRE(port1 == 8080); + + // Now modify the Host header to a shorter value with different port + MIMEField *host_field = req_hdr.field_find("Host"sv); + REQUIRE(host_field != nullptr); + + host_field->value_set(req_hdr.m_heap, req_hdr.m_mime, "short.com:9090"sv); + + // Should return the new hostname and port + std::string_view host2 = req_hdr.host_get(); + int port2 = req_hdr.port_get(); + CHECK(host2 == "short.com"sv); + CHECK(port2 == 9090); + + req_hdr.destroy(); + } + + // Test case: Host header without port - add port + SECTION("Host header without port - add port") + { + static const char request[] = { + "GET / HTTP/1.1\r\n" + "Host: original.example.com\r\n" + "\r\n", + }; + + HTTPHdr req_hdr; + HTTPParser parser; + const char *start = request; + const char *end = start + strlen(start); + + http_parser_init(&parser); + req_hdr.create(HTTPType::REQUEST); + + ParseResult err; + while (true) { + err = req_hdr.parse_req(&parser, &start, end, true); + if (err != ParseResult::CONT) { + break; + } + } + REQUIRE(err == ParseResult::DONE); + http_parser_clear(&parser); + + // First call - populates the cache + // Note: port is 0 when no port in Host header and no URL scheme + std::string_view host1 = req_hdr.host_get(); + int port1 = req_hdr.port_get(); + REQUIRE(host1 == "original.example.com"sv); + REQUIRE(port1 == 0); // no port specified, no scheme to default from + + // Modify to add a port + MIMEField *host_field = req_hdr.field_find("Host"sv); + REQUIRE(host_field != nullptr); + + host_field->value_set(req_hdr.m_heap, req_hdr.m_mime, "newhost.com:3128"sv); + + // Should return the new hostname and port + std::string_view host2 = req_hdr.host_get(); + int port2 = req_hdr.port_get(); + CHECK(host2 == "newhost.com"sv); + CHECK(port2 == 3128); + + req_hdr.destroy(); + } + + // Test case: Host header with port - remove port + SECTION("Host header with port - remove port") + { + static const char request[] = { + "GET / HTTP/1.1\r\n" + "Host: original.example.com:8080\r\n" + "\r\n", + }; + + HTTPHdr req_hdr; + HTTPParser parser; + const char *start = request; + const char *end = start + strlen(start); + + http_parser_init(&parser); + req_hdr.create(HTTPType::REQUEST); + + ParseResult err; + while (true) { + err = req_hdr.parse_req(&parser, &start, end, true); + if (err != ParseResult::CONT) { + break; + } + } + REQUIRE(err == ParseResult::DONE); + http_parser_clear(&parser); + + // First call - populates the cache + std::string_view host1 = req_hdr.host_get(); + int port1 = req_hdr.port_get(); + REQUIRE(host1 == "original.example.com"sv); + REQUIRE(port1 == 8080); + + // Modify to remove the port + MIMEField *host_field = req_hdr.field_find("Host"sv); + REQUIRE(host_field != nullptr); + + host_field->value_set(req_hdr.m_heap, req_hdr.m_mime, "noport.example.org"sv); + + // Should return the new hostname + // Note: port is 0 when no port in Host header and no URL scheme + std::string_view host2 = req_hdr.host_get(); + int port2 = req_hdr.port_get(); + CHECK(host2 == "noport.example.org"sv); + CHECK(port2 == 0); // no port specified, no scheme to default from + + req_hdr.destroy(); + } + + // Test case: Multiple modifications with varying ports + SECTION("Multiple Host modifications with ports") + { + static const char request[] = { + "GET / HTTP/1.1\r\n" + "Host: first.com:1111\r\n" + "\r\n", + }; + + HTTPHdr req_hdr; + HTTPParser parser; + const char *start = request; + const char *end = start + strlen(start); + + http_parser_init(&parser); + req_hdr.create(HTTPType::REQUEST); + + ParseResult err; + while (true) { + err = req_hdr.parse_req(&parser, &start, end, true); + if (err != ParseResult::CONT) { + break; + } + } + REQUIRE(err == ParseResult::DONE); + http_parser_clear(&parser); + + MIMEField *host_field = req_hdr.field_find("Host"sv); + REQUIRE(host_field != nullptr); + + // Initial + CHECK(req_hdr.host_get() == "first.com"sv); + CHECK(req_hdr.port_get() == 1111); + + // Modification 1: change host and port + host_field->value_set(req_hdr.m_heap, req_hdr.m_mime, "second.com:2222"sv); + CHECK(req_hdr.host_get() == "second.com"sv); + CHECK(req_hdr.port_get() == 2222); + + // Modification 2: longer host, remove port + // Note: port is 0 when no port in Host header and no URL scheme + host_field->value_set(req_hdr.m_heap, req_hdr.m_mime, "very.long.third.example.org"sv); + CHECK(req_hdr.host_get() == "very.long.third.example.org"sv); + CHECK(req_hdr.port_get() == 0); + + // Modification 3: short host, add port back + host_field->value_set(req_hdr.m_heap, req_hdr.m_mime, "x.co:443"sv); + CHECK(req_hdr.host_get() == "x.co"sv); + CHECK(req_hdr.port_get() == 443); + + // Modification 4: change just the port + host_field->value_set(req_hdr.m_heap, req_hdr.m_mime, "x.co:8443"sv); + CHECK(req_hdr.host_get() == "x.co"sv); + CHECK(req_hdr.port_get() == 8443); + + req_hdr.destroy(); + } +} From 8e708ee1db82e525b7dcf3c9ac6a1737e8e11046 Mon Sep 17 00:00:00 2001 From: Bryan Call Date: Wed, 7 Jan 2026 12:03:13 -0800 Subject: [PATCH 2/2] Fix: HTTPHdr host cache invalidation without changing struct size Use calculated expected length for staleness detection instead of storing additional pointer/length fields. This avoids changing sizeof(HTTPHdr) which would break cache compatibility. The staleness check now computes the expected Host header value length from the cached m_host_length plus port digits (if m_port_in_header is set) and compares against m_host_mime->m_len_value. Added warning comment about cache compatibility implications of changing HTTPHdr size. Co-authored-by: Brian Neradt --- include/proxy/hdrs/HTTP.h | 60 ++++++++++++++++++++++++++++----------- src/proxy/hdrs/HTTP.cc | 7 +---- 2 files changed, 45 insertions(+), 22 deletions(-) diff --git a/include/proxy/hdrs/HTTP.h b/include/proxy/hdrs/HTTP.h index c7c0f65a618..dc2d7b61f7f 100644 --- a/include/proxy/hdrs/HTTP.h +++ b/include/proxy/hdrs/HTTP.h @@ -444,20 +444,27 @@ bool is_http1_hdr_version_supported(const HTTPVersion &http_version); class IOBufferReader; +/** HTTP Header class. + * + * @warning Changing the size of this class (adding/removing fields) will change + * the on-disk cache format and cause cache incompatibility. The HTTPCacheAlt + * structure contains embedded HTTPHdr objects, and the cache marshalling code + * uses sizeof(HTTPCacheAlt) to read/write cache entries. Any size change will + * cause "vector inconsistency" errors when reading cache entries written by a + * different version. + */ class HTTPHdr : public MIMEHdr { public: - HTTPHdrImpl *m_http = nullptr; - mutable URL m_url_cached; - mutable MIMEField *m_host_mime = nullptr; - mutable const char *m_host_ptr = nullptr; ///< Cached host value pointer for staleness detection. - mutable int m_host_value_len = 0; ///< Cached raw Host header value length for staleness detection. - mutable int m_host_length = 0; ///< Length of hostname (parsed, excludes port). - mutable int m_port = 0; ///< Target port. - mutable bool m_target_cached = false; ///< Whether host name and port are cached. - mutable bool m_target_in_url = false; ///< Whether host name and port are in the URL. - mutable bool m_100_continue_sent = false; ///< Whether ATS sent a 100 Continue optimized response. - mutable bool m_100_continue_required = false; ///< Whether 100_continue is in the Expect header. + HTTPHdrImpl *m_http = nullptr; + mutable URL m_url_cached; + mutable MIMEField *m_host_mime = nullptr; + mutable int m_host_length = 0; ///< Length of hostname (parsed, excludes port). + mutable int m_port = 0; ///< Target port. + mutable bool m_target_cached = false; ///< Whether host name and port are cached. + mutable bool m_target_in_url = false; ///< Whether host name and port are in the URL. + mutable bool m_100_continue_sent = false; ///< Whether ATS sent a 100 Continue optimized response. + mutable bool m_100_continue_required = false; ///< Whether 100_continue is in the Expect header. /// Set if the port was effectively specified in the header. /// @c true if the target (in the URL or the HOST field) also specified /// a port. That is, @c true if whatever source had the target host @@ -734,12 +741,33 @@ HTTPHdr::print(char *buf, int bufsize, int *bufindex, int *dumpoffset) const inline void HTTPHdr::_test_and_fill_target_cache() const { - // Check if cache is stale: either not cached, or the Host header value has changed. - // We check both pointer and length to detect modifications even in the unlikely case - // where heap compaction causes a new allocation at the same address. - if (!m_target_cached || - (m_host_mime && (m_host_mime->m_ptr_value != m_host_ptr || m_host_mime->m_len_value != m_host_value_len))) { + if (!m_target_cached) { this->_fill_target_cache(); + return; + } + + // If host came from the Host header (not URL), check for staleness by verifying + // the current Host header value length matches what we expect from cached values. + if (!m_target_in_url && m_host_mime != nullptr) { + int expected_len = m_host_length; + if (m_port_in_header && m_port > 0) { + // Account for ":port" suffix in the raw Host header value. + expected_len += 1; // colon + if (m_port < 10) { + expected_len += 1; + } else if (m_port < 100) { + expected_len += 2; + } else if (m_port < 1000) { + expected_len += 3; + } else if (m_port < 10000) { + expected_len += 4; + } else { + expected_len += 5; + } + } + if (m_host_mime->m_len_value != expected_len) { + this->_fill_target_cache(); + } } } diff --git a/src/proxy/hdrs/HTTP.cc b/src/proxy/hdrs/HTTP.cc index 079c7cc2507..f0db0665e7f 100644 --- a/src/proxy/hdrs/HTTP.cc +++ b/src/proxy/hdrs/HTTP.cc @@ -1640,8 +1640,6 @@ HTTPHdr::_fill_target_cache() const m_target_in_url = false; m_port_in_header = false; m_host_mime = nullptr; - m_host_ptr = nullptr; - m_host_value_len = 0; // Check in the URL first, then the HOST field. std::string_view host; if (host = url->host_get(); nullptr != host.data()) { @@ -1656,10 +1654,7 @@ HTTPHdr::_fill_target_cache() const m_host_length = static_cast(host.length()); if (m_host_mime != nullptr) { - // Cache pointer and length for staleness detection - if either changes, the value was modified. - m_host_ptr = m_host_mime->m_ptr_value; - m_host_value_len = m_host_mime->m_len_value; - m_port = 0; + m_port = 0; if (!port.empty()) { for (auto c : port) { if (isdigit(c)) {