diff --git a/src/workerd/api/cache.c++ b/src/workerd/api/cache.c++ index 8245e1f0d91..855965880fe 100644 --- a/src/workerd/api/cache.c++ +++ b/src/workerd/api/cache.c++ @@ -255,9 +255,9 @@ jsg::Promise Cache::put(jsg::Lock& js, "Cannot cache response to a range request (206 Partial Content)."); auto responseHeadersRef = jsResponse->getHeaders(js); - auto cacheControl = responseHeadersRef->getNoChecks(js, "cache-control"_kj); + auto cacheControl = responseHeadersRef->getCommon(js, capnp::CommonHeaderName::CACHE_CONTROL); - KJ_IF_SOME(vary, responseHeadersRef->getNoChecks(js, "vary"_kj)) { + KJ_IF_SOME(vary, responseHeadersRef->getCommon(js, capnp::CommonHeaderName::VARY)) { JSG_REQUIRE(vary.findFirst('*') == kj::none, TypeError, "Cannot cache response with 'Vary: *' header."); } @@ -532,7 +532,7 @@ kj::Own Cache::getHttpClient(IoContext& context, kj::Maybe cfBlobJson, kj::LiteralStringConst operationName, kj::StringPtr url, - kj::Maybe cacheControl, + kj::Maybe cacheControl, bool enableCompatFlags) { auto span = context.makeTraceSpan(operationName); auto userSpan = context.makeUserTraceSpan(operationName); diff --git a/src/workerd/api/cache.h b/src/workerd/api/cache.h index 64a14f5a1ed..492a31d6f66 100644 --- a/src/workerd/api/cache.h +++ b/src/workerd/api/cache.h @@ -93,7 +93,7 @@ class Cache: public jsg::Object { kj::Maybe cfBlobJson, kj::LiteralStringConst operationName, kj::StringPtr url, - kj::Maybe cacheControl, + kj::Maybe cacheControl, bool enableCompatFlags); }; diff --git a/src/workerd/api/eventsource.c++ b/src/workerd/api/eventsource.c++ index dfce6b099bf..e988ad6920f 100644 --- a/src/workerd/api/eventsource.c++ +++ b/src/workerd/api/eventsource.c++ @@ -362,10 +362,8 @@ void EventSource::start(jsg::Lock& js) { js, self, kj::str("The response status code was ", response->getStatus(), ".")); } - // TODO(cleanup): Using jsg::ByteString here is really annoying. It would be nice to have - // an internal alternative that doesn't require an allocation. KJ_IF_SOME(contentType, - response->getHeaders(js)->get(js, jsg::ByteString(kj::str("content-type")))) { + response->getHeaders(js)->getCommon(js, capnp::CommonHeaderName::CONTENT_TYPE)) { bool invalid = false; KJ_IF_SOME(parsed, MimeType::tryParse(contentType)) { invalid = parsed != MimeType::EVENT_STREAM; @@ -421,12 +419,10 @@ void EventSource::start(jsg::Lock& js) { }); auto headers = js.alloc(); - headers->set( - js, jsg::ByteString(kj::str("accept")), jsg::ByteString(MimeType::EVENT_STREAM.essence())); - headers->set(js, jsg::ByteString(kj::str("cache-control")), jsg::ByteString(kj::str("no-cache"))); + headers->setCommon(capnp::CommonHeaderName::ACCEPT, MimeType::EVENT_STREAM.essence()); + headers->setCommon(capnp::CommonHeaderName::CACHE_CONTROL, kj::str("no-cache")); if (lastEventId != ""_kjc) { - headers->set( - js, jsg::ByteString(kj::str("last-event-id")), jsg::ByteString(kj::str(lastEventId))); + headers->setUnguarded(js, kj::str("last-event-id"), kj::str(lastEventId)); } fetchImpl(js, kj::mv(fetcher), kj::str(i.url), diff --git a/src/workerd/api/global-scope.c++ b/src/workerd/api/global-scope.c++ index 3c5f9d0c489..bb47a718345 100644 --- a/src/workerd/api/global-scope.c++ +++ b/src/workerd/api/global-scope.c++ @@ -165,8 +165,6 @@ kj::Promise> ServiceWorkerGlobalScope::request(kj::HttpMetho CfProperty cf(cfBlobJson); - auto jsHeaders = js.alloc(js, headers, Headers::Guard::REQUEST); - // We only create the body stream if there is a body to read. kj::Maybe> maybeJsStream = kj::none; @@ -190,9 +188,10 @@ kj::Promise> ServiceWorkerGlobalScope::request(kj::HttpMetho // // TODO(cleanup): Should KJ HTTP interfaces explicitly communicate the difference between a // missing body and an empty one? + auto newHeaders = headers.cloneShallow(); kj::Maybe body; - if (headers.get(kj::HttpHeaderId::CONTENT_LENGTH) != kj::none || - headers.get(kj::HttpHeaderId::TRANSFER_ENCODING) != kj::none || + if (newHeaders.get(kj::HttpHeaderId::CONTENT_LENGTH) != kj::none || + newHeaders.get(kj::HttpHeaderId::TRANSFER_ENCODING) != kj::none || requestBody.tryGetLength().orDefault(1) > 0) { // We do not automatically decode gzipped request bodies because the fetch() standard doesn't // specify any automatic encoding of requests. https://github.com/whatwg/fetch/issues/589 @@ -205,16 +204,13 @@ kj::Promise> ServiceWorkerGlobalScope::request(kj::HttpMetho // If the request doesn't specify "Content-Length" or "Transfer-Encoding", set "Content-Length" // to the body length if it's known. This ensures handlers for worker-to-worker requests can // access known body lengths if they're set, without buffering bodies. - if (body != kj::none && headers.get(kj::HttpHeaderId::CONTENT_LENGTH) == kj::none && - headers.get(kj::HttpHeaderId::TRANSFER_ENCODING) == kj::none) { - // We can't use headers.set() here as headers is marked const. Instead, we call set() on the - // JavaScript headers object, ignoring the REQUEST guard that usually makes them immutable. + // TODO(cleanup): It would be nice if kj::HttpHeaders had an inlined has method + if (body != kj::none && newHeaders.get(kj::HttpHeaderId::CONTENT_LENGTH) == kj::none && + newHeaders.get(kj::HttpHeaderId::TRANSFER_ENCODING) == kj::none) { KJ_IF_SOME(l, requestBody.tryGetLength()) { - jsHeaders->setUnguarded( - js, jsg::ByteString(kj::str("Content-Length")), jsg::ByteString(kj::str(l))); + newHeaders.set(kj::HttpHeaderId::CONTENT_LENGTH, kj::str(l)); } else { - jsHeaders->setUnguarded( - js, jsg::ByteString(kj::str("Transfer-Encoding")), jsg::ByteString(kj::str("chunked"))); + newHeaders.setPtr(kj::HttpHeaderId::TRANSFER_ENCODING, "chunked"); } } @@ -228,7 +224,8 @@ kj::Promise> ServiceWorkerGlobalScope::request(kj::HttpMetho js.alloc(IoContext::NEXT_CLIENT_CHANNEL, Fetcher::RequiresHostAndProtocol::YES); } - auto jsRequest = js.alloc(js, method, url, Request::Redirect::MANUAL, kj::mv(jsHeaders), + auto jsRequest = js.alloc(js, method, url, Request::Redirect::MANUAL, + js.alloc(js, newHeaders, Headers::Guard::REQUEST), KJ_ASSERT_NONNULL(defaultFetcher).addRef(), /* signal */ kj::mv(abortSignal), kj::mv(cf), kj::mv(body), /* thisSignal */ kj::none, Request::CacheMode::NONE); diff --git a/src/workerd/api/headers.c++ b/src/workerd/api/headers.c++ new file mode 100644 index 00000000000..33ecf6389aa --- /dev/null +++ b/src/workerd/api/headers.c++ @@ -0,0 +1,728 @@ +#include "headers.h" + +#include "simdutf.h" +#include "util.h" + +#include +#include +#include +#include + +namespace workerd::api { + +namespace { +// If any more headers are added to the CommonHeaderName enum later, we should be careful about +// introducing them into serialization. We need to roll out a change that recognizes the new IDs +// before rolling out a change that sends them. MAX_COMMON_HEADER_ID is the max value we're willing +// to send. +constexpr size_t MAX_COMMON_HEADER_ID = + static_cast(capnp::CommonHeaderName::WWW_AUTHENTICATE); + +#define COMMON_HEADERS(V) \ + V("accept-charset") \ + V("accept-encoding") \ + V("accept-language") \ + V("accept-ranges") \ + V("accept") \ + V("access-control-allow-origin") \ + V("age") \ + V("allow") \ + V("authorization") \ + V("cache-control") \ + V("content-disposition") \ + V("content-encoding") \ + V("content-language") \ + V("content-length") \ + V("content-location") \ + V("content-range") \ + V("content-type") \ + V("cookie") \ + V("date") \ + V("etag") \ + V("expect") \ + V("expires") \ + V("from") \ + V("host") \ + V("if-match") \ + V("if-modified-since") \ + V("if-none-match") \ + V("if-range") \ + V("if-unmodified-since") \ + V("last-modified") \ + V("link") \ + V("location") \ + V("max-forwards") \ + V("proxy-authenticate") \ + V("proxy-authorization") \ + V("range") \ + V("referer") \ + V("refresh") \ + V("retry-after") \ + V("server") \ + V("set-cookie") \ + V("strict-transport-security") \ + V("transfer-encoding") \ + V("user-agent") \ + V("vary") \ + V("via") \ + V("www-authenticate") + +// Constexpr array of lowercase common header names (must match CommonHeaderName enum order +// and must be kept in sync with the ordinal values defined in http-over-capnp.capnp). Since +// it is extremely unlikely that those will change often, we hardcode them here for runtime +// efficiency. +#define V(Name) Name##_kj, +constexpr kj::StringPtr COMMON_HEADER_NAMES[] = {nullptr, // 0: invalid + COMMON_HEADERS(V)}; +#undef V + +inline constexpr kj::StringPtr getCommonHeaderName(uint id) { + KJ_ASSERT(id > 0 && id <= MAX_COMMON_HEADER_ID, "Invalid common header ID"); + return COMMON_HEADER_NAMES[id]; +} + +constexpr bool strcaseeq(kj::StringPtr a, kj::StringPtr b) { + if (a.size() != b.size()) return false; + for (size_t i = 0; i < a.size(); ++i) { + char ca = a[i]; + char cb = b[i]; + // Convert to lowercase for comparison + if ('A' <= ca && ca <= 'Z') ca += 32; + if ('A' <= cb && cb <= 'Z') cb += 32; + if (ca != cb) return false; + } + return true; +} + +constexpr uint caseInsensitiveHash(kj::StringPtr name) { + uint hash = 2166136261u; + for (size_t i = 0; i < name.size(); ++i) { + char c = name[i]; + if ('A' <= c && c <= 'Z') c += 32; + hash ^= static_cast(c); + hash *= 16777619u; + } + return hash; +} + +constexpr size_t HEADER_MAP_SIZE = 128; + +// Constexpr hash table for case-insensitive mapping of header names to their +// common header id (if any). +struct HeaderHashTable final { + struct Entry { + kj::StringPtr name; + uint id; + }; + + Entry entries[HEADER_MAP_SIZE] = {}; + + constexpr HeaderHashTable() { + for (size_t i = 0; i < HEADER_MAP_SIZE; ++i) { + entries[i] = {nullptr, 0}; + } + + for (uint i = 1; i <= MAX_COMMON_HEADER_ID; ++i) { + auto name = COMMON_HEADER_NAMES[i]; + size_t slot = caseInsensitiveHash(name) % HEADER_MAP_SIZE; + while (entries[slot].id != 0) { + slot = (slot + 1) % HEADER_MAP_SIZE; + } + entries[slot] = {name, i}; + } + } + + constexpr uint find(kj::StringPtr name) const { + if (name == nullptr) return 0; + + size_t slot = caseInsensitiveHash(name) % HEADER_MAP_SIZE; + + // Linear probe until we find a match or empty slot + for (size_t probes = 0; probes < HEADER_MAP_SIZE; ++probes) { + const auto& entry = entries[slot]; + if (entry.id == 0) return 0; + if (entry.name.size() == name.size() && strcaseeq(entry.name, name)) { + return entry.id; + } + slot = (slot + 1) % HEADER_MAP_SIZE; + } + return 0; // Not found + } +}; + +constexpr HeaderHashTable HEADER_HASH_TABLE; +// Quick check to verify that the hash table is constructed correctly. +static_assert(HEADER_HASH_TABLE.find("accept-charset"_kj) == 1); +static_assert(HEADER_HASH_TABLE.find("AcCePt-ChArSeT"_kj) == 1); + +static_assert(std::size(COMMON_HEADER_NAMES) == (MAX_COMMON_HEADER_ID + 1)); + +void maybeWarnIfBadHeaderString(kj::StringPtr str) { + if (IoContext::hasCurrent()) { + auto& context = IoContext::current(); + if (context.isInspectorEnabled()) { + if (!simdutf::validate_ascii(str.begin(), str.size())) { + // The string contains non-ASCII characters. While any 8-bit value is technically valid + // in HTTP headers, we encode header strings as UTF-8, so we want to warn the user that + // their header name/value may not be what they may expect based on what browsers do. + auto utf8Hex = + kj::strArray(KJ_MAP(b, str) { return kj::str("\\x", kj::hex(kj::byte(b))); }, ""); + context.logWarning(kj::str("A header value contains non-ASCII characters: \"", str, + "\" (raw bytes: \"", utf8Hex, + "\"). As a quirk to support Unicode, we are encoding " + "values as UTF-8 in the header, but in a browser this would likely result in a " + "TypeError exception. Consider encoding this string in ASCII for compatibility with " + "browser implementations of the Fetch specification.")); + } + } + } +} + +// Left- and right-trim HTTP whitespace from `value`. +kj::String normalizeHeaderValue(kj::String value) { + // Fast path: if empty, return as-is + if (value.size() == 0) return kj::mv(value); + + char* begin = value.begin(); + char* end = value.end(); + + while (begin < end && util::isHttpWhitespace(*begin)) ++begin; + while (begin < end && util::isHttpWhitespace(*(end - 1))) --end; + + size_t newSize = end - begin; + if (newSize == value.size()) { + JSG_REQUIRE(workerd::util::isValidHeaderValue(value), TypeError, "Invalid header value."); + maybeWarnIfBadHeaderString(value); + return kj::mv(value); + } + + auto trimmed = kj::ArrayPtr(begin, newSize); + JSG_REQUIRE(workerd::util::isValidHeaderValue(trimmed), TypeError, "Invalid header value."); + maybeWarnIfBadHeaderString(value); + // By attaching the original array to the trimmed view, we keep the original allocation alive + // and prevent an unnecessary copy. + return kj::str(trimmed.attach(value.releaseArray())); +} + +constexpr bool isSetCookie(const Headers::HeaderKey& key) { + KJ_SWITCH_ONEOF(key) { + KJ_CASE_ONEOF(commonId, uint) { + return commonId == static_cast(capnp::CommonHeaderName::SET_COOKIE); + } + KJ_CASE_ONEOF(uncommonKey, kj::String) { + // This case really shouldn't happen since "set-cookie" is a common header, + // but just in case... + return uncommonKey == "set-cookie"; + } + } + KJ_UNREACHABLE; +} + +constexpr Headers::HeaderKey getHeaderKeyFor(kj::StringPtr name) { + if (uint commonId = HEADER_HASH_TABLE.find(name)) { + KJ_DASSERT(commonId > 0 && commonId <= MAX_COMMON_HEADER_ID); + return commonId; + } + + for (char c: name) { + JSG_REQUIRE(util::isHttpTokenChar(c), TypeError, "Invalid header name."); + } + + // Not a common header, so allocate lowercase copy for uncommon header + return toLower(name); +} + +constexpr Headers::HeaderKey cloneHeaderKey(const Headers::HeaderKey& key) { + KJ_SWITCH_ONEOF(key) { + KJ_CASE_ONEOF(commonId, uint) { + return commonId; + } + KJ_CASE_ONEOF(uncommonKey, kj::String) { + return kj::str(uncommonKey); + } + } + KJ_UNREACHABLE; +} +} // namespace + +Headers::Header::Header(HeaderKey key, kj::Maybe name, kj::Vector values) + : key(kj::mv(key)), + name(kj::mv(name)), + values(kj::mv(values)) {} + +kj::StringPtr Headers::Header::Header::getKeyName() const { + KJ_SWITCH_ONEOF(key) { + KJ_CASE_ONEOF(commonId, uint) { + return COMMON_HEADER_NAMES[commonId]; + } + KJ_CASE_ONEOF(uncommonKey, kj::String) { + return uncommonKey; + } + } + KJ_UNREACHABLE; +} + +kj::StringPtr Headers::Header::getHeaderName() const { + KJ_IF_SOME(preservedName, name) { + return preservedName; + } + return getKeyName(); +} + +Headers::Header Headers::Header::clone() const { + return Header(cloneHeaderKey(key), name.map([](const kj::String& n) { return kj::str(n); }), + KJ_MAP(value, values) { return kj::str(value); }); +} + +bool Headers::HeaderCallbacks::matches(Header& header, const HeaderKey& other) { + return header.key == other; +} + +bool Headers::HeaderCallbacks::matches(Header& header, kj::StringPtr otherName) { + return matches(header, getHeaderKeyFor(otherName)); +} + +bool Headers::HeaderCallbacks::matches(Header& header, capnp::CommonHeaderName commondId) { + KJ_IF_SOME(headerCommonId, header.key.tryGet()) { + return headerCommonId == static_cast(commondId); + } + return false; +} + +kj::uint Headers::HeaderCallbacks::hashCode(const HeaderKey& key) { + KJ_SWITCH_ONEOF(key) { + KJ_CASE_ONEOF(commonId, uint) { + return kj::hashCode(commonId); + } + KJ_CASE_ONEOF(uncommonKey, kj::String) { + return kj::hashCode(uncommonKey); + } + } + KJ_UNREACHABLE; +} + +kj::uint Headers::HeaderCallbacks::hashCode(capnp::CommonHeaderName commondId) { + return kj::hashCode(commondId); +} + +Headers::Headers(jsg::Lock& js, jsg::Dict dict): guard(Guard::NONE) { + headers.reserve(dict.fields.size()); + for (auto& field: dict.fields) { + append(js, kj::mv(field.name), kj::mv(field.value)); + } +} + +Headers::Headers(jsg::Lock& js, const Headers& other): guard(Guard::NONE) { + headers.reserve(other.headers.size()); + for (auto& header: other.headers) { + // There really shouldn't be any duplicate headers in other, but just in case, use upsert + // and we'll just ignore duplicates. + headers.upsert(header.clone(), [](auto&, auto&&) {}); + } +} + +Headers::Headers(jsg::Lock& js, const kj::HttpHeaders& other, Guard guard): guard(Guard::NONE) { + headers.reserve(other.size()); + // TODO(perf): Once kj::HttpHeaders supports an API for getting the CommonHeaderName directly + // from the headers, we can optimize this to avoid looking up the common header IDs again, + // making this constructor more efficient when copying common headers from kj::HttpHeaders. + other.forEach([this, &js](auto name, auto value) { + // We have to copy the strings here but we can avoid normalizing and validating since + // they presumably already went through that process when they were added to the + // kj::HttpHeader instance. + appendUnguarded(js, kj::str(name), kj::str(value)); + }); + + this->guard = guard; +} + +jsg::Ref Headers::clone(jsg::Lock& js) const { + auto result = js.alloc(js, *this); + result->guard = guard; + return kj::mv(result); +} + +// Fill in the given HttpHeaders with these headers. Note that strings are inserted by +// reference, so the output must be consumed immediately. +void Headers::shallowCopyTo(kj::HttpHeaders& out) { + // TODO(perf): Once kj::HttpHeaders supports an API for setting headers by CommonHeaderName, + // we can optimize this to avoid the additional lookup of the header name and use of addPtrPtr. + for (auto& entry: headers) { + for (auto& value: entry.values) { + out.addPtrPtr(entry.getHeaderName(), value); + } + } +} + +bool Headers::hasLowerCase(kj::StringPtr name) { +#ifdef KJ_DEBUG + for (auto c: name) { + KJ_DREQUIRE(!('A' <= c && c <= 'Z')); + } +#endif + return headers.find(getHeaderKeyFor(name)) != kj::none; +} + +kj::Array Headers::getDisplayedHeaders(jsg::Lock& js) { + if (FeatureFlags::get(js).getHttpHeadersGetSetCookie()) { + kj::Vector vec; + size_t reserved = 0; + for (auto& header: headers) { + if (isSetCookie(header.key)) { + reserved += header.values.size(); + } else { + reserved += 1; + } + } + vec.reserve(reserved); + for (auto& header: headers) { + if (isSetCookie(header.key)) { + // For set-cookie entries, we iterate each individually without combining them. + for (auto& value: header.values) { + vec.add(Headers::DisplayedHeader{ + .key = kj::str(header.getKeyName()), + .value = kj::str(value), + }); + } + } else { + vec.add(Headers::DisplayedHeader{ + .key = kj::str(header.getKeyName()), + .value = kj::strArray(header.values, ", "), + }); + } + } + auto ret = vec.releaseAsArray(); + std::sort(ret.begin(), ret.end(), [](const auto& a, const auto& b) { return a.key < b.key; }); + return kj::mv(ret); + } else { + // The old behavior before the standard getSetCookie() API was introduced... + kj::Vector vec(headers.size()); + for (auto& header: headers) { + vec.add(DisplayedHeader{ + .key = kj::str(header.getKeyName()), + .value = kj::strArray(header.values, ", "), + }); + } + auto ret = vec.releaseAsArray(); + std::sort(ret.begin(), ret.end(), [](const auto& a, const auto& b) { return a.key < b.key; }); + return kj::mv(ret); + } +} + +jsg::Ref Headers::constructor(jsg::Lock& js, jsg::Optional init) { + using StringDict = jsg::Dict; + + KJ_IF_SOME(i, init) { + KJ_SWITCH_ONEOF(kj::mv(i)) { + KJ_CASE_ONEOF(dict, StringDict) { + return js.alloc(js, kj::mv(dict)); + } + KJ_CASE_ONEOF(headers, jsg::Ref) { + return js.alloc(js, *headers); + // It's important to note here that we are treating the Headers object + // as a special case here. Per the fetch spec, we *should* be grabbing + // the Symbol.iterator off the Headers object and interpreting it as + // a Sequence> (as in the StringPairs case + // below). However, special casing Headers like we do here is more + // performant and has other side effects such as preserving the casing + // of header names that have been received. + // + // This does mean that we fail one of the more pathological (and kind + // of weird) Web Platform Tests for this API: + // + // const h = new Headers(); + // h[Symbol.iterator] = function * () { yield ["test", "test"]; }; + // const headers = new Headers(h); + // console.log(headers.has("test")); + // + // The spec would say headers.has("test") here should be true. With our + // implementation here, however, we are ignoring the Symbol.iterator so + // the test fails. + } + KJ_CASE_ONEOF(pairs, StringPairs) { + auto dict = KJ_MAP(entry, pairs) { + JSG_REQUIRE(entry.size() == 2, TypeError, + "To initialize a Headers object from a sequence, each inner sequence " + "must have exactly two elements."); + return StringDict::Field{kj::mv(entry[0]), kj::mv(entry[1])}; + }; + return js.alloc(js, StringDict{kj::mv(dict)}); + } + } + } + + return js.alloc(); +} + +kj::Maybe Headers::get(jsg::Lock& js, kj::String name) { + return getUnguarded(js, name); +} + +kj::Maybe Headers::getUnguarded(jsg::Lock&, kj::StringPtr name) { + KJ_IF_SOME(found, headers.find(getHeaderKeyFor(name))) { + return kj::strArray(found.values, ", "); + } + return kj::none; +} + +kj::Maybe Headers::getCommon(jsg::Lock& js, capnp::CommonHeaderName idx) { + KJ_DASSERT(static_cast(idx) <= MAX_COMMON_HEADER_ID); + KJ_IF_SOME(found, headers.find(idx)) { + return kj::strArray(found.values, ", "); + } + return kj::none; +} + +kj::Array Headers::getSetCookie() { + KJ_IF_SOME(found, headers.find(capnp::CommonHeaderName::SET_COOKIE)) { + return KJ_MAP(value, found.values) { return value.asPtr(); }; + } + return nullptr; +} + +kj::Array Headers::getAll(kj::String name) { + if (!strcaseeq(name, "set-cookie"_kj)) { + JSG_FAIL_REQUIRE(TypeError, "getAll() can only be used with the header name \"Set-Cookie\"."); + } + + // getSetCookie() is the standard API here. getAll(...) is our legacy non-standard extension + // for the same use case. We continue to support getAll for backwards compatibility but moving + // forward users really should be using getSetCookie. + return getSetCookie(); +} + +bool Headers::has(kj::String name) { + return headers.find(getHeaderKeyFor(name)) != kj::none; +} + +bool Headers::hasCommon(capnp::CommonHeaderName idx) { + KJ_DASSERT(static_cast(idx) <= MAX_COMMON_HEADER_ID); + return headers.find(idx) != kj::none; +} + +void Headers::set(jsg::Lock& js, kj::String name, kj::String value) { + checkGuard(); + setUnguarded(js, kj::mv(name), normalizeHeaderValue(kj::mv(value))); +} + +void Headers::setUnguarded(jsg::Lock& js, kj::String name, kj::String value) { + auto key = getHeaderKeyFor(name); + auto& header = headers.findOrCreate(key, [&]() { + Header header(kj::mv(key)); + auto keyName = header.getKeyName(); + if (keyName.size() != name.size() || keyName != name) { + header.name = kj::mv(name); + } + return kj::mv(header); + }); + header.values.resize(1); + header.values[0] = kj::mv(value); +} + +void Headers::setCommon(capnp::CommonHeaderName idx, kj::String value) { + KJ_DASSERT(static_cast(idx) <= MAX_COMMON_HEADER_ID); + HeaderKey key = static_cast(idx); + auto& header = headers.findOrCreate(key, [&]() { return Header(kj::mv(key)); }); + header.values.clear(); + header.values.add(kj::mv(value)); +} + +void Headers::append(jsg::Lock& js, kj::String name, kj::String value) { + checkGuard(); + appendUnguarded(js, kj::mv(name), normalizeHeaderValue(kj::mv(value))); +} + +void Headers::appendUnguarded(jsg::Lock& js, kj::String name, kj::String value) { + auto key = getHeaderKeyFor(name); + auto& header = headers.findOrCreate(key, [&]() { + Header header(kj::mv(key)); + auto keyName = header.getKeyName(); + if (keyName.size() != name.size() || keyName != name) { + header.name = kj::mv(name); + } + return kj::mv(header); + }); + header.values.add(kj::mv(value)); +} + +void Headers::delete_(kj::String name) { + checkGuard(); + headers.eraseMatch(getHeaderKeyFor(name)); +} + +void Headers::deleteCommon(capnp::CommonHeaderName idx) { + headers.eraseMatch(idx); +} + +// There are a couple implementation details of the Headers iterators worth calling out. +// +// 1. Each iterator gets its own copy of the keys and/or values of the headers. While nauseating +// from a performance perspective, this solves both the iterator -> iterable lifetime dependence +// and the iterator invalidation issue: i.e., it's impossible for a user to unsafely modify the +// Headers data structure while iterating over it, because they are simply two separate data +// structures. By empirical testing, this seems to be how Chrome implements Headers iteration. +// +// Other alternatives bring their own pitfalls. We could store a Ref of the parent Headers +// object, solving the lifetime issue. To solve the iterator invalidation issue, we could store a +// copy of the currently-iterated-over key and use std::upper_bound() to find the next entry +// every time we want to increment the iterator (making the increment operation O(lg n) rather +// than O(1)); or we could make each Header entry in the map store a set of back-pointers to all +// live iterators pointing to it, with delete_() incrementing all iterators in the set whenever +// it deletes a header entry. Neither hack appealed to me. +// +// 2. Notice that the next() member function of the iterator classes moves the string(s) they +// contain, rather than making a copy of them as in the FormData iterators. This is safe to do +// because, unlike FormData, these iterators have their own copies of the strings, and since they +// are forward-only iterators, we know we won't need the strings again. +// +// TODO(perf): On point 1, perhaps we could avoid most copies by using a copy-on-write strategy +// applied to the header map elements? We'd still copy the whole data structure to avoid iterator +// invalidation, but the elements would be cheaper to copy. + +jsg::Ref Headers::entries(jsg::Lock& js) { + return js.alloc(IteratorState{getDisplayedHeaders(js)}); +} +jsg::Ref Headers::keys(jsg::Lock& js) { + if (FeatureFlags::get(js).getHttpHeadersGetSetCookie()) { + kj::Vector keysCopy; + for (auto& header: headers) { + // Set-Cookie headers must be handled specially. They should never be combined into a + // single value, so the values iterator must separate them. It seems a bit silly, but + // the keys iterator can end up having multiple set-cookie instances. + if (isSetCookie(header.key)) { + auto values = getSetCookie(); + for (auto n = 0; n < values.size(); n++) { + keysCopy.add(kj::str(header.getKeyName())); + } + } else { + keysCopy.add(kj::str(header.getKeyName())); + } + } + auto ret = keysCopy.releaseAsArray(); + std::sort(ret.begin(), ret.end(), [](const auto& a, const auto& b) { return a < b; }); + return js.alloc(IteratorState{kj::mv(ret)}); + } else { + auto keysCopy = KJ_MAP(header, headers) { return kj::str(header.getKeyName()); }; + std::sort(keysCopy.begin(), keysCopy.end(), [](const auto& a, const auto& b) { return a < b; }); + return js.alloc(IteratorState{kj::mv(keysCopy)}); + } +} +jsg::Ref Headers::values(jsg::Lock& js) { + // Annoyingly, the spec requires that the values iterator still be sorted by key. + // To make this easiest, let's grab the displayed headers and then extract the values. + // the getDisplayedHeaders() function does the sorting for us at the cost of an extra + // copy of the names. Fortunately, enumerating by value is likely way less common than + // other forms of iteration so the cost should be acceptable. + auto headers = getDisplayedHeaders(js); + kj::Vector values(headers.size()); + for (auto& header: headers) { + values.add(kj::mv(header.value)); + }; + return js.alloc(IteratorState(values.releaseAsArray())); +} + +void Headers::forEach(jsg::Lock& js, + jsg::Function)> callback, + jsg::Optional thisArg) { + auto receiver = js.v8Undefined(); + KJ_IF_SOME(arg, thisArg) { + auto handle = arg.getHandle(js); + if (!handle->IsNullOrUndefined()) { + receiver = handle; + } + } + callback.setReceiver(js.v8Ref(receiver)); + + for (auto& entry: getDisplayedHeaders(js)) { + callback(js, entry.value, entry.key, JSG_THIS); + } +} + +bool Headers::inspectImmutable() { + return guard != Guard::NONE; +} + +void Headers::visitForMemoryInfo(jsg::MemoryTracker& tracker) const { + for (const auto& header: headers) { + tracker.trackField(nullptr, header); + } +} + +// ----------------------------------------------------------------------------- +// serialization of headers +// +// http-over-capnp.capnp has a nice list of common header names, taken from the HTTP/2 standard. +// We'll use it as an optimization. +// +// Note that using numeric IDs for headers implies we lose the original capitalization. However, +// the JS Headers API doesn't actually give the application any way to observe the capitalization +// of header names -- it only becomes relevant when serializing over HTTP/1.1. And at that point, +// we are actually free to change the capitalization anyway, and we commonly do (KJ itself will +// normalize capitalization of all registered headers, and http-over-capnp also loses +// capitalization). So, it's certainly not worth it to try to keep the original capitalization +// across serialization. + +void Headers::serialize(jsg::Lock& js, jsg::Serializer& serializer) { + // We serialize as a series of key-value pairs. Each value is a length-delimited string. Each key + // is a common header ID, or the value zero to indicate an uncommon header, which is then + // followed by a length-delimited name. + + serializer.writeRawUint32(static_cast(guard)); + + // Write the count of headers. + uint count = 0; + for (auto& entry: headers) { + count += entry.values.size(); + } + serializer.writeRawUint32(count); + + // Now write key/values. + for (auto& header: headers) { + for (auto& value: header.values) { + KJ_SWITCH_ONEOF(header.key) { + KJ_CASE_ONEOF(commonId, uint) { + serializer.writeRawUint32(commonId); + } + KJ_CASE_ONEOF(_, kj::String) { + serializer.writeRawUint32(0); + serializer.writeLengthDelimited(header.getHeaderName()); + } + } + serializer.writeLengthDelimited(value); + } + } +} + +jsg::Ref Headers::deserialize( + jsg::Lock& js, rpc::SerializationTag tag, jsg::Deserializer& deserializer) { + auto result = js.alloc(); + uint guard = deserializer.readRawUint32(); + KJ_REQUIRE(guard <= static_cast(Guard::NONE), "unknown guard value"); + + uint count = deserializer.readRawUint32(); + + for (auto i KJ_UNUSED: kj::zeroTo(count)) { + uint commonId = deserializer.readRawUint32(); + kj::String name; + if (commonId == 0) { + name = deserializer.readLengthDelimitedString(); + } else { + KJ_ASSERT(commonId <= MAX_COMMON_HEADER_ID); + name = kj::str(getCommonHeaderName(commonId)); + } + + auto value = deserializer.readLengthDelimitedString(); + + // TODO(performance): We can avoid some copies here by constructing the + // the Header entry directly using information from the deserializer + // directly without relying on append. + result->appendUnguarded(js, kj::mv(name), kj::mv(value)); + } + + // Don't actually set the guard until here because it may block the ability to call `append()`. + result->guard = static_cast(guard); + + return result; +} + +} // namespace workerd::api diff --git a/src/workerd/api/headers.h b/src/workerd/api/headers.h new file mode 100644 index 00000000000..3c70cf7f0d9 --- /dev/null +++ b/src/workerd/api/headers.h @@ -0,0 +1,245 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace workerd::api { + +class Headers final: public jsg::Object { +private: + template + struct IteratorState { + kj::Array copy; + decltype(copy.begin()) cursor = copy.begin(); + }; + +public: + enum class Guard { + // WARNING: This type is serialized, do not change the numeric values. + IMMUTABLE = 0, + REQUEST = 1, + // REQUEST_NO_CORS, // CORS not relevant on server side + RESPONSE = 2, + NONE = 3 + }; + + struct DisplayedHeader { + kj::String key; // lower-cased name + kj::String value; // comma-concatenation of all values seen + }; + + Headers(): guard(Guard::NONE) {} + explicit Headers(jsg::Lock& js, jsg::Dict dict); + explicit Headers(jsg::Lock& js, const Headers& other); + explicit Headers(jsg::Lock& js, const kj::HttpHeaders& other, Guard guard); + KJ_DISALLOW_COPY_AND_MOVE(Headers); + + // Make a copy of this Headers object, and preserve the guard. + jsg::Ref clone(jsg::Lock& js) const; + + // Fill in the given HttpHeaders with these headers. Note that strings are inserted by + // reference, so the output must be consumed immediately. + void shallowCopyTo(kj::HttpHeaders& out); + + // Like has(), but only call this with an already-lower-case `name`. Useful to avoid an + // unnecessary string allocation. Not part of the JS interface. + bool hasLowerCase(kj::StringPtr name); + + // Returns headers with lower-case name and comma-concatenated duplicates. + kj::Array getDisplayedHeaders(jsg::Lock& js); + + using StringPair = jsg::Sequence; + using StringPairs = jsg::Sequence; + + // Per the fetch specification, it is possible to initialize a Headers object + // from any other object that has a Symbol.iterator implementation. Those are + // handled in this Initializer definition using the StringPairs definition + // that aliases jsg::Sequence>. Technically, + // the Headers object itself falls under that definition as well. However, treating + // a Headers object as a jsg::Sequence> is nowhere near as + // performant and has the side effect of forcing all header names to be lower-cased + // rather than case-preserved. Instead of following the spec exactly here, we + // choose to special case creating a Header object from another Header object. + // This is an intentional departure from the spec. + using Initializer = kj::OneOf, + StringPairs, + jsg::Dict>; + + static jsg::Ref constructor(jsg::Lock& js, jsg::Optional init); + kj::Maybe get(jsg::Lock& js, kj::String name); + + // getAll is a legacy non-standard extension API that we introduced before + // getSetCookie() was defined. We continue to support it for backwards + // compatibility but users really ought to be using getSetCookie() now. + kj::Array getAll(kj::String name); + + // The Set-Cookie header is special in that it is the only HTTP header that + // is not permitted to be combined into a single instance. + kj::Array getSetCookie(); + + bool has(kj::String name); + + void set(jsg::Lock& js, kj::String name, kj::String value); + void append(jsg::Lock& js, kj::String name, kj::String value); + void delete_(kj::String name); + + kj::Maybe getUnguarded(jsg::Lock& js, kj::StringPtr name); + void setUnguarded(jsg::Lock& js, kj::String name, kj::String value); + void appendUnguarded(jsg::Lock& js, kj::String name, kj::String value); + + kj::Maybe getCommon(jsg::Lock& js, capnp::CommonHeaderName idx); + bool hasCommon(capnp::CommonHeaderName idx); + void setCommon(capnp::CommonHeaderName idx, kj::String value); + void deleteCommon(capnp::CommonHeaderName idx); + + void forEach(jsg::Lock& js, + jsg::Function)>, + jsg::Optional); + + bool inspectImmutable(); + + JSG_ITERATOR(EntryIterator, entries, + kj::Array, + IteratorState, + entryIteratorNext) + JSG_ITERATOR(KeyIterator, keys, + kj::String, + IteratorState, + keyOrValueIteratorNext) + JSG_ITERATOR(ValueIterator, values, + kj::String, + IteratorState, + keyOrValueIteratorNext) + + // JavaScript API. + + JSG_RESOURCE_TYPE(Headers, CompatibilityFlags::Reader flags) { + JSG_METHOD(get); + JSG_METHOD(getAll); + if (flags.getHttpHeadersGetSetCookie()) { + JSG_METHOD(getSetCookie); + } + JSG_METHOD(has); + JSG_METHOD(set); + JSG_METHOD(append); + JSG_METHOD_NAMED(delete, delete_); + JSG_METHOD(forEach); + JSG_METHOD(entries); + JSG_METHOD(keys); + JSG_METHOD(values); + + JSG_INSPECT_PROPERTY(immutable, inspectImmutable); + + JSG_ITERABLE(entries); + + JSG_TS_DEFINE(type HeadersInit = Headers | Iterable> | Record); + // All type aliases get inlined when exporting RTTI, but this type alias is included by + // the official TypeScript types, so users might be depending on it. + + JSG_TS_OVERRIDE({ + constructor(init?: HeadersInit); + + entries(): IterableIterator<[key: string, value: string]>; + [Symbol.iterator](): IterableIterator<[key: string, value: string]>; + + forEach(callback: (this: This, value: string, key: string, parent: Headers) => void, thisArg?: This): void; + }); + } + + void serialize(jsg::Lock& js, jsg::Serializer& serializer); + static jsg::Ref deserialize( + jsg::Lock& js, rpc::SerializationTag tag, jsg::Deserializer& deserializer); + + JSG_SERIALIZABLE(rpc::SerializationTag::HEADERS); + + void visitForMemoryInfo(jsg::MemoryTracker& tracker) const; + + // A header is identified by either a common header ID or an uncommon header name. + // The header key name is always identifed in lower-case form, while the original + // casing is preserved in the actual Header struct to support case-preserving display. + // TODO(perf): We can likely optimize this further by interning uncommon header names + // so that we avoid repeated allocations of the same uncommon header name. Unless + // it proves to be a performance problem, however, we can leave that for future work. + using HeaderKey = kj::OneOf; + +private: + struct Header final { + // The header key, either a common header ID or an uncommon header name. + HeaderKey key; + // If the casing of the header name does not match the lower-cased version, we + // store the original casing here for display purposes. If the casing matches, this + // remains unset to avoid redundant allocation. + kj::Maybe name; + + // We intentionally do not comma-concatenate header values of the same name, as we need to be + // able to re-serialize them separately. This is particularly important for the Set-Cookie + // header, which uses a date format that requires a comma. This would normally suggest using a + // std::multimap, but we also need to be able to display the values in comma-concatenated form + // via Headers.entries()[1] in order to be Fetch-conformant. Storing a vector of strings in a + // std::map makes this easier, and also makes it easy to honor the "first header name casing is + // used for all duplicate header names" rule[2] that the Fetch spec mandates. + // + // See: 1: https://fetch.spec.whatwg.org/#concept-header-list-sort-and-combine + // 2: https://fetch.spec.whatwg.org/#concept-header-list-append + kj::Vector values; + + // Returns the lower-cased key name of the header. + kj::StringPtr getKeyName() const; + + // If the casing of the header name matches the lower-cased version, this + // returns the key name, otherwise it returns the preserved-casing name. + kj::StringPtr getHeaderName() const; + + explicit Header(HeaderKey key, kj::Maybe name = kj::none, + kj::Vector values = kj::Vector(1)); + + Header clone() const; + + JSG_MEMORY_INFO(Header) { + tracker.trackField("key", key.tryGet()); + tracker.trackField("name", name); + for (const auto& value : values) { + tracker.trackField(nullptr, value); + } + } + }; + + struct HeaderCallbacks final { + inline static const HeaderKey& keyForRow(const Header& header) { return header.key; } + inline static HeaderKey& keyForRow(Header& header) { return header.key; } + static bool matches(Header& header, const HeaderKey& other); + static bool matches(Header& header, kj::StringPtr otherName); + static bool matches(Header& header, capnp::CommonHeaderName commondId); + static kj::uint hashCode(const HeaderKey& key); + static kj::uint hashCode(capnp::CommonHeaderName commondId); + }; + + kj::Table> headers; + + Guard guard; + + void checkGuard() { + JSG_REQUIRE(guard == Guard::NONE, TypeError, "Can't modify immutable headers."); + } + + static kj::Maybe> entryIteratorNext(jsg::Lock& js, auto& state) { + if (state.cursor == state.copy.end()) { + return kj::none; + } + auto& ret = *state.cursor++; + return kj::arr(kj::mv(ret.key), kj::mv(ret.value)); + } + + static kj::Maybe keyOrValueIteratorNext(jsg::Lock& js, auto& state) { + if (state.cursor == state.copy.end()) { + return kj::none; + } + auto& ret = *state.cursor++; + return kj::mv(ret); + } +}; + +} // namespace workerd::api diff --git a/src/workerd/api/html-rewriter.c++ b/src/workerd/api/html-rewriter.c++ index 87609eaca8d..71b03b35d08 100644 --- a/src/workerd/api/html-rewriter.c++ +++ b/src/workerd/api/html-rewriter.c++ @@ -1245,7 +1245,8 @@ jsg::Ref HTMLRewriter::transform(jsg::Lock& js, jsg::Ref res kj::String ownContentType; kj::String encoding = kj::str("utf-8"); - KJ_IF_SOME(contentType, response->getHeaders(js)->getNoChecks(js, "content-type"_kj)) { + KJ_IF_SOME(contentType, + response->getHeaders(js)->getCommon(js, capnp::CommonHeaderName::CONTENT_TYPE)) { // TODO(cleanup): readContentTypeParameter can be replaced with using // workerd/util/mimetype.h directly. KJ_IF_SOME(charset, readContentTypeParameter(contentType, "charset")) { diff --git a/src/workerd/api/http.c++ b/src/workerd/api/http.c++ index 9ba47a52418..7ac1edf2442 100644 --- a/src/workerd/api/http.c++ +++ b/src/workerd/api/http.c++ @@ -5,6 +5,7 @@ #include "http.h" #include "data-url.h" +#include "headers.h" #include "queue.h" #include "sockets.h" #include "system-streams.h" @@ -29,97 +30,9 @@ #include #include -#include - namespace workerd::api { namespace { - -void warnIfBadHeaderString(const jsg::ByteString& byteString) { - if (IoContext::hasCurrent()) { - auto& context = IoContext::current(); - if (context.isInspectorEnabled()) { - if (byteString.warning == jsg::ByteString::Warning::CONTAINS_EXTENDED_ASCII) { - // We're in a bit of a pickle: the script author is using our API correctly, but we're doing - // the wrong thing by UTF-8-encoding their bytes. To help the author understand the issue, - // we can show the string that they would be putting in the header if we implemented the - // spec correctly, and the string that is actually going get serialized onto the wire. - auto rawHex = kj::strArray(KJ_MAP(b, fastEncodeUtf16(byteString.asArray())) { - KJ_ASSERT(b < 256); // Guaranteed by StringWrapper having set CONTAINS_EXTENDED_ASCII. - return kj::str("\\x", kj::hex(kj::byte(b))); - }, ""); - auto utf8Hex = - kj::strArray( - KJ_MAP(b, byteString) { return kj::str("\\x", kj::hex(kj::byte(b))); }, ""); - - context.logWarning(kj::str("Problematic header name or value: \"", byteString, - "\" (raw bytes: \"", rawHex, - "\"). " - "This string contains 8-bit characters in the range 0x80 - 0xFF. As a quirk to support " - "Unicode, we encode header strings in UTF-8, meaning the actual header name/value on " - "the wire will be \"", - utf8Hex, - "\". Consider encoding this string in ASCII for " - "compatibility with browser implementations of the Fetch specifications.")); - } else if (byteString.warning == jsg::ByteString::Warning::CONTAINS_UNICODE) { - context.logWarning(kj::str("Invalid header name or value: \"", byteString, - "\". Per the Fetch specification, the " - "Headers class may only accept header names and values which contain 8-bit characters. " - "That is, they must not contain any Unicode code points greater than 0xFF. As a quirk, " - "we are encoding this string in UTF-8 in the header, but in a browser this would " - "result in a TypeError exception. Consider encoding this string in ASCII for " - "compatibility with browser implementations of the Fetch specification.")); - } - } - } -} - -// Left- and right-trim HTTP whitespace from `value`. -jsg::ByteString normalizeHeaderValue(jsg::Lock& js, jsg::ByteString value) { - warnIfBadHeaderString(value); - - kj::ArrayPtr slice = value; - auto isHttpWhitespace = [](char c) { return c == '\t' || c == '\r' || c == '\n' || c == ' '; }; - while (slice.size() > 0 && isHttpWhitespace(slice.front())) { - slice = slice.slice(1, slice.size()); - } - while (slice.size() > 0 && isHttpWhitespace(slice.back())) { - slice = slice.first(slice.size() - 1); - } - if (slice.size() == value.size()) { - return kj::mv(value); - } - return jsg::ByteString(kj::str(slice)); -} - -void requireValidHeaderName(const jsg::ByteString& name) { - // TODO(cleanup): Code duplication with kj/compat/http.c++ - - warnIfBadHeaderString(name); - - constexpr auto HTTP_SEPARATOR_CHARS = kj::parse::anyOfChars("()<>@,;:\\\"/[]?={} \t"); - // RFC2616 section 2.2: https://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 - - constexpr auto HTTP_TOKEN_CHARS = kj::parse::controlChar.orChar('\x7f') - .orGroup(kj::parse::whitespaceChar) - .orGroup(HTTP_SEPARATOR_CHARS) - .invert(); - // RFC2616 section 2.2: https://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 - // RFC2616 section 4.2: https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 - - for (char c: name) { - JSG_REQUIRE(HTTP_TOKEN_CHARS.contains(c), TypeError, "Invalid header name."); - } -} - -void requireValidHeaderValue(kj::StringPtr value) { - // TODO(cleanup): Code duplication with kj/compat/http.c++ - - for (char c: value) { - JSG_REQUIRE(c != '\0' && c != '\r' && c != '\n', TypeError, "Invalid header value."); - } -} - Request::CacheMode getCacheModeFromName(kj::StringPtr value) { if (value == "no-store") return Request::CacheMode::NOSTORE; if (value == "no-cache") return Request::CacheMode::NOCACHE; @@ -143,306 +56,6 @@ jsg::Optional getCacheModeName(Request::CacheMode mode) { } // namespace -Headers::Headers(jsg::Lock& js, jsg::Dict dict) - : guard(Guard::NONE) { - for (auto& field: dict.fields) { - append(js, kj::mv(field.name), kj::mv(field.value)); - } -} - -Headers::Headers(jsg::Lock& js, const Headers& other): guard(Guard::NONE) { - for (auto& header: other.headers) { - Header copy{ - jsg::ByteString(kj::str(header.second.key)), - jsg::ByteString(kj::str(header.second.name)), - KJ_MAP(value, header.second.values) { return jsg::ByteString(kj::str(value)); }, - }; - kj::StringPtr keyRef = copy.key; - KJ_ASSERT(headers.insert(std::make_pair(keyRef, kj::mv(copy))).second); - } -} - -Headers::Headers(jsg::Lock& js, const kj::HttpHeaders& other, Guard guard): guard(Guard::NONE) { - other.forEach([this, &js](auto name, auto value) { - append(js, jsg::ByteString(kj::str(name)), jsg::ByteString(kj::str(value))); - }); - - this->guard = guard; -} - -jsg::Ref Headers::clone(jsg::Lock& js) const { - auto result = js.alloc(js, *this); - result->guard = guard; - return kj::mv(result); -} - -// Fill in the given HttpHeaders with these headers. Note that strings are inserted by -// reference, so the output must be consumed immediately. -void Headers::shallowCopyTo(kj::HttpHeaders& out) { - for (auto& entry: headers) { - for (auto& value: entry.second.values) { - out.addPtrPtr(entry.second.name, value); - } - } -} - -bool Headers::hasLowerCase(kj::StringPtr name) { -#ifdef KJ_DEBUG - for (auto c: name) { - KJ_DREQUIRE(!('A' <= c && c <= 'Z')); - } -#endif - return headers.contains(name); -} - -kj::Array Headers::getDisplayedHeaders(jsg::Lock& js) { - if (FeatureFlags::get(js).getHttpHeadersGetSetCookie()) { - kj::Vector copy; - for (auto& entry: headers) { - if (entry.first == "set-cookie") { - // For set-cookie entries, we iterate each individually without - // combining them. - for (auto& value: entry.second.values) { - copy.add(Headers::DisplayedHeader{ - .key = jsg::ByteString(kj::str(entry.first)), - .value = jsg::ByteString(kj::str(value)), - }); - } - } else { - copy.add(Headers::DisplayedHeader{.key = jsg::ByteString(kj::str(entry.first)), - .value = jsg::ByteString(kj::strArray(entry.second.values, ", "))}); - } - } - return copy.releaseAsArray(); - } else { - // The old behavior before the standard getSetCookie() API was introduced... - auto headersCopy = KJ_MAP(mapEntry, headers) { - const auto& header = mapEntry.second; - return DisplayedHeader{ - jsg::ByteString(kj::str(header.key)), jsg::ByteString(kj::strArray(header.values, ", "))}; - }; - return headersCopy; - } -} - -jsg::Ref Headers::constructor(jsg::Lock& js, jsg::Optional init) { - using StringDict = jsg::Dict; - - KJ_IF_SOME(i, init) { - KJ_SWITCH_ONEOF(kj::mv(i)) { - KJ_CASE_ONEOF(dict, StringDict) { - return js.alloc(js, kj::mv(dict)); - } - KJ_CASE_ONEOF(headers, jsg::Ref) { - return js.alloc(js, *headers); - // It's important to note here that we are treating the Headers object - // as a special case here. Per the fetch spec, we *should* be grabbing - // the Symbol.iterator off the Headers object and interpreting it as - // a Sequence> (as in the ByteStringPairs case - // below). However, special casing Headers like we do here is more - // performant and has other side effects such as preserving the casing - // of header names that have been received. - // - // This does mean that we fail one of the more pathological (and kind - // of weird) Web Platform Tests for this API: - // - // const h = new Headers(); - // h[Symbol.iterator] = function * () { yield ["test", "test"]; }; - // const headers = new Headers(h); - // console.log(headers.has("test")); - // - // The spec would say headers.has("test") here should be true. With our - // implementation here, however, we are ignoring the Symbol.iterator so - // the test fails. - } - KJ_CASE_ONEOF(pairs, ByteStringPairs) { - auto dict = KJ_MAP(entry, pairs) { - JSG_REQUIRE(entry.size() == 2, TypeError, - "To initialize a Headers object from a sequence, each inner sequence " - "must have exactly two elements."); - return StringDict::Field{kj::mv(entry[0]), kj::mv(entry[1])}; - }; - return js.alloc(js, StringDict{kj::mv(dict)}); - } - } - } - - return js.alloc(); -} - -kj::Maybe Headers::get(jsg::Lock& js, jsg::ByteString name) { - requireValidHeaderName(name); - return getNoChecks(js, name.asPtr()); -} - -kj::Maybe Headers::getNoChecks(jsg::Lock& js, kj::StringPtr name) { - auto iter = headers.find(toLower(name)); - if (iter == headers.end()) { - return kj::none; - } else { - return jsg::ByteString(kj::strArray(iter->second.values, ", ")); - } -} - -kj::ArrayPtr Headers::getSetCookie() { - auto iter = headers.find("set-cookie"); - if (iter == headers.end()) { - return nullptr; - } else { - return iter->second.values.asPtr(); - } -} - -kj::ArrayPtr Headers::getAll(jsg::ByteString name) { - requireValidHeaderName(name); - - if (strcasecmp(name.cStr(), "set-cookie") != 0) { - JSG_FAIL_REQUIRE(TypeError, "getAll() can only be used with the header name \"Set-Cookie\"."); - } - - // getSetCookie() is the standard API here. getAll(...) is our legacy non-standard extension - // for the same use case. We continue to support getAll for backwards compatibility but moving - // forward users really should be using getSetCookie. - return getSetCookie(); -} - -bool Headers::has(jsg::ByteString name) { - requireValidHeaderName(name); - return headers.contains(toLower(kj::mv(name))); -} - -void Headers::set(jsg::Lock& js, jsg::ByteString name, jsg::ByteString value) { - checkGuard(); - requireValidHeaderName(name); - value = normalizeHeaderValue(js, kj::mv(value)); - requireValidHeaderValue(value); - setUnguarded(js, kj::mv(name), kj::mv(value)); -} - -void Headers::setUnguarded(jsg::Lock& js, jsg::ByteString name, jsg::ByteString value) { - // The variation of toLower we use here creates a copy. - auto key = jsg::ByteString(toLower(name)); - auto [iter, emplaced] = headers.try_emplace(key, kj::mv(key), kj::mv(name), kj::mv(value)); - if (!emplaced) { - // Overwrite existing value(s). - iter->second.values.clear(); - iter->second.values.add(kj::mv(value)); - } -} - -void Headers::append(jsg::Lock& js, jsg::ByteString name, jsg::ByteString value) { - checkGuard(); - requireValidHeaderName(name); - // The variation of toLower we use here creates a copy. - auto key = jsg::ByteString(toLower(name)); - value = normalizeHeaderValue(js, kj::mv(value)); - requireValidHeaderValue(value); - auto [iter, emplaced] = headers.try_emplace(key, kj::mv(key), kj::mv(name), kj::mv(value)); - if (!emplaced) { - iter->second.values.add(kj::mv(value)); - } -} - -void Headers::delete_(jsg::ByteString name) { - checkGuard(); - requireValidHeaderName(name); - headers.erase(toLower(kj::mv(name))); -} - -// There are a couple implementation details of the Headers iterators worth calling out. -// -// 1. Each iterator gets its own copy of the keys and/or values of the headers. While nauseating -// from a performance perspective, this solves both the iterator -> iterable lifetime dependence -// and the iterator invalidation issue: i.e., it's impossible for a user to unsafely modify the -// Headers data structure while iterating over it, because they are simply two separate data -// structures. By empirical testing, this seems to be how Chrome implements Headers iteration. -// -// Other alternatives bring their own pitfalls. We could store a Ref of the parent Headers -// object, solving the lifetime issue. To solve the iterator invalidation issue, we could store a -// copy of the currently-iterated-over key and use std::upper_bound() to find the next entry -// every time we want to increment the iterator (making the increment operation O(lg n) rather -// than O(1)); or we could make each Header entry in the map store a set of back-pointers to all -// live iterators pointing to it, with delete_() incrementing all iterators in the set whenever -// it deletes a header entry. Neither hack appealed to me. -// -// 2. Notice that the next() member function of the iterator classes moves the string(s) they -// contain, rather than making a copy of them as in the FormData iterators. This is safe to do -// because, unlike FormData, these iterators have their own copies of the strings, and since they -// are forward-only iterators, we know we won't need the strings again. -// -// TODO(perf): On point 1, perhaps we could avoid most copies by using a copy-on-write strategy -// applied to the header map elements? We'd still copy the whole data structure to avoid iterator -// invalidation, but the elements would be cheaper to copy. - -jsg::Ref Headers::entries(jsg::Lock& js) { - return js.alloc(IteratorState{getDisplayedHeaders(js)}); -} -jsg::Ref Headers::keys(jsg::Lock& js) { - if (FeatureFlags::get(js).getHttpHeadersGetSetCookie()) { - kj::Vector keysCopy; - for (auto& entry: headers) { - // Set-Cookie headers must be handled specially. They should never be combined into a - // single value, so the values iterator must separate them. It seems a bit silly, but - // the keys iterator can end up having multiple set-cookie instances. - if (entry.first == "set-cookie") { - for (auto n = 0; n < entry.second.values.size(); n++) { - keysCopy.add(jsg::ByteString(kj::str(entry.first))); - } - } else { - keysCopy.add(jsg::ByteString(kj::str(entry.first))); - } - } - return js.alloc(IteratorState{keysCopy.releaseAsArray()}); - } else { - auto keysCopy = - KJ_MAP(mapEntry, headers) { return jsg::ByteString(kj::str(mapEntry.second.key)); }; - return js.alloc(IteratorState{kj::mv(keysCopy)}); - } -} -jsg::Ref Headers::values(jsg::Lock& js) { - if (FeatureFlags::get(js).getHttpHeadersGetSetCookie()) { - kj::Vector values; - for (auto& entry: headers) { - // Set-Cookie headers must be handled specially. They should never be combined into a - // single value, so the values iterator must separate them. - if (entry.first == "set-cookie") { - for (auto& value: entry.second.values) { - values.add(jsg::ByteString(kj::str(value))); - } - } else { - values.add(jsg::ByteString(kj::strArray(entry.second.values, ", "))); - } - } - return js.alloc(IteratorState{values.releaseAsArray()}); - } else { - auto valuesCopy = KJ_MAP(mapEntry, headers) { - return jsg::ByteString(kj::strArray(mapEntry.second.values, ", ")); - }; - return js.alloc(IteratorState{kj::mv(valuesCopy)}); - } -} - -void Headers::forEach(jsg::Lock& js, - jsg::Function)> callback, - jsg::Optional thisArg) { - auto receiver = js.v8Undefined(); - KJ_IF_SOME(arg, thisArg) { - auto handle = arg.getHandle(js); - if (!handle->IsNullOrUndefined()) { - receiver = handle; - } - } - callback.setReceiver(js.v8Ref(receiver)); - - for (auto& entry: getDisplayedHeaders(js)) { - callback(js, entry.value, entry.key, JSG_THIS); - } -} - -bool Headers::inspectImmutable() { - return guard != Guard::NONE; -} - // ----------------------------------------------------------------------------- // serialization of headers // @@ -457,141 +70,6 @@ bool Headers::inspectImmutable() { // capitalization). So, it's certainly not worth it to try to keep the original capitalization // across serialization. -// If any more headers are added to the CommonHeaderName enum later, we should be careful about -// introducing them into serialization. We need to roll out a change that recognizes the new IDs -// before rolling out a change that sends them. MAX_COMMON_HEADER_ID is the max value we're willing -// to send. -static constexpr uint MAX_COMMON_HEADER_ID = - static_cast(capnp::CommonHeaderName::WWW_AUTHENTICATE); - -// ID for the `$commonText` annotation declared in http-over-capnp.capnp. -// TODO(cleanup): Cap'n Proto should really codegen constants for annotation IDs so we don't have -// to copy them. -static constexpr uint64_t COMMON_TEXT_ANNOTATION_ID = 0x857745131db6fc83; - -static kj::Array makeCommonHeaderList() { - auto enums = capnp::Schema::from().getEnumerants(); - auto builder = kj::heapArrayBuilder(enums.size()); - bool first = true; - for (auto e: enums) { - if (first) { - // Value zero is invalid, skip it. - static_assert(static_cast(capnp::CommonHeaderName::INVALID) == 0); - - // Add `nullptr` to the array so that our array indexes aren't off-by-one from the enum - // values. We could in theory skip this and use +1 and -1 in a bunch of places but that seems - // error-prone. - builder.add(nullptr); - - first = false; - continue; - } - - kj::Maybe name; - - // Look for $commonText annotation. - for (auto ann: e.getProto().getAnnotations()) { - if (ann.getId() == COMMON_TEXT_ANNOTATION_ID) { - name = ann.getValue().getText(); - break; - } - } - - builder.add(KJ_ASSERT_NONNULL(name)); - } - - return builder.finish(); -} - -static kj::ArrayPtr getCommonHeaderList() { - static const kj::Array LIST = makeCommonHeaderList(); - return LIST; -} - -static kj::HashMap makeCommonHeaderMap() { - kj::HashMap result; - auto list = getCommonHeaderList(); - KJ_ASSERT(MAX_COMMON_HEADER_ID < list.size()); - for (auto i: kj::range(1, MAX_COMMON_HEADER_ID + 1)) { - auto key = kj::str(list[i]); - for (auto& c: key) { - if ('A' <= c && c <= 'Z') { - c = c - 'A' + 'a'; - } - } - result.insert(kj::mv(key), i); - } - return result; -} - -static const kj::HashMap& getCommonHeaderMap() { - static const kj::HashMap MAP = makeCommonHeaderMap(); - return MAP; -} - -void Headers::serialize(jsg::Lock& js, jsg::Serializer& serializer) { - // We serialize as a series of key-value pairs. Each value is a length-delimited string. Each key - // is a common header ID, or the value zero to indicate an uncommon header, which is then - // followed by a length-delimited name. - - serializer.writeRawUint32(static_cast(guard)); - - // Write the count of headers. - uint count = 0; - for (auto& entry: headers) { - count += entry.second.values.size(); - } - serializer.writeRawUint32(count); - - // Now write key/values. - auto& commonHeaders = getCommonHeaderMap(); - for (auto& entry: headers) { - auto& header = entry.second; - auto commonId = commonHeaders.find(header.key); - for (auto& value: header.values) { - KJ_IF_SOME(c, commonId) { - serializer.writeRawUint32(c); - } else { - serializer.writeRawUint32(0); - serializer.writeLengthDelimited(header.name); - } - serializer.writeLengthDelimited(value); - } - } -} - -jsg::Ref Headers::deserialize( - jsg::Lock& js, rpc::SerializationTag tag, jsg::Deserializer& deserializer) { - auto result = js.alloc(); - uint guard = deserializer.readRawUint32(); - KJ_REQUIRE(guard <= static_cast(Guard::NONE), "unknown guard value"); - - uint count = deserializer.readRawUint32(); - - auto commonHeaders = getCommonHeaderList(); - for (auto i KJ_UNUSED: kj::zeroTo(count)) { - uint commonId = deserializer.readRawUint32(); - kj::String name; - if (commonId == 0) { - name = deserializer.readLengthDelimitedString(); - } else { - KJ_ASSERT(commonId < commonHeaders.size()); - name = kj::str(commonHeaders[commonId]); - } - - auto value = deserializer.readLengthDelimitedString(); - - result->append(js, jsg::ByteString(kj::mv(name)), jsg::ByteString(kj::mv(value))); - } - - // Don't actually set the guard until here because it may block the ability to call `append()`. - result->guard = static_cast(guard); - - return result; -} - -// ======================================================================================= - namespace { class BodyBufferInputStream final: public ReadableStreamSource { @@ -732,12 +210,12 @@ Body::ExtractedBody Body::extractBody(jsg::Lock& js, Initializer init) { } Body::Body(jsg::Lock& js, kj::Maybe init, Headers& headers) - : impl(kj::mv(init).map([&headers, &js](auto i) -> Impl { + : impl(kj::mv(init).map([&headers](auto i) -> Impl { KJ_IF_SOME(ct, i.contentType) { - if (!headers.hasLowerCase("content-type")) { + if (!headers.hasCommon(capnp::CommonHeaderName::CONTENT_TYPE)) { // The spec allows the user to override the Content-Type, if they wish, so we only set // the Content-Type if it doesn't already exist. - headers.set(js, jsg::ByteString(kj::str("Content-Type")), jsg::ByteString(kj::mv(ct))); + headers.setCommon(capnp::CommonHeaderName::CONTENT_TYPE, kj::mv(ct)); } else if (MimeType::FORM_DATA == ct) { // Custom content-type request/responses with FormData are broken since they require a // boundary parameter only the FormData serializer can provide. Let's warn if a dev does this. @@ -830,7 +308,7 @@ jsg::Promise Body::text(jsg::Lock& js) { // When running in the fiddle, let's warn the developer if they do this. auto& context = IoContext::current(); if (context.isInspectorEnabled()) { - KJ_IF_SOME(type, headersRef.getNoChecks(js, "Content-Type"_kj)) { + KJ_IF_SOME(type, headersRef.getCommon(js, capnp::CommonHeaderName::CONTENT_TYPE)) { maybeWarnIfNotText(js, type); } } @@ -853,8 +331,9 @@ jsg::Promise> Body::formData(jsg::Lock& js) { "Body has already been used. " "It can only be used once. Use tee() first if you need to read it twice."); - auto contentType = JSG_REQUIRE_NONNULL(headersRef.getNoChecks(js, "Content-Type"_kj), TypeError, - "Parsing a Body as FormData requires a Content-Type header."); + auto contentType = + JSG_REQUIRE_NONNULL(headersRef.getCommon(js, capnp::CommonHeaderName::CONTENT_TYPE), + TypeError, "Parsing a Body as FormData requires a Content-Type header."); KJ_IF_SOME(i, impl) { KJ_ASSERT(!i.stream->isDisturbed()); @@ -885,8 +364,8 @@ jsg::Promise Body::json(jsg::Lock& js) { jsg::Promise> Body::blob(jsg::Lock& js) { return arrayBuffer(js).then(js, [this](jsg::Lock& js, jsg::BufferSource buffer) { - kj::String contentType = headersRef.getNoChecks(js, "Content-Type"_kj) - .map([](jsg::ByteString&& b) -> kj::String { + kj::String contentType = headersRef.getCommon(js, capnp::CommonHeaderName::CONTENT_TYPE) + .map([](auto&& b) -> kj::String { return kj::mv(b); }).orDefault(nullptr); @@ -1563,9 +1042,8 @@ jsg::Ref Response::json_( jsg::Lock& js, jsg::JsValue any, jsg::Optional maybeInit) { const auto maybeSetContentType = [](jsg::Lock& js, auto headers) { - if (!headers->hasLowerCase("content-type"_kj)) { - headers->setUnguarded(js, jsg::ByteString(kj::str("content-type")), - jsg::ByteString(MimeType::JSON.toString())); + if (!headers->hasCommon(capnp::CommonHeaderName::CONTENT_TYPE)) { + headers->setCommon(capnp::CommonHeaderName::CONTENT_TYPE, MimeType::JSON.toString()); } return kj::mv(headers); }; @@ -2221,7 +1699,7 @@ jsg::Promise> handleHttpRedirectResponse(jsg::Lock& js, // // (NB: "CORS non-wildcard request-header name" consists solely of "Authorization") - jsRequest->getHeaders(js)->delete_(jsg::ByteString(kj::str("authorization"))); + jsRequest->getHeaders(js)->deleteCommon(capnp::CommonHeaderName::AUTHORIZATION); } } @@ -2371,8 +1849,7 @@ jsg::Promise> fetchImplNoOutputLock(jsg::Lock& js, } auto headers = js.alloc(); - headers->set(js, jsg::ByteString(kj::str("content-type"_kj)), - jsg::ByteString(dataUrl.getMimeType().toString())); + headers->setCommon(capnp::CommonHeaderName::CONTENT_TYPE, dataUrl.getMimeType().toString()); return js.resolvedPromise(Response::constructor(js, kj::mv(maybeResponseBody), Response::InitializerDict{ .status = 200, diff --git a/src/workerd/api/http.h b/src/workerd/api/http.h index 37cc65e18b2..9c8d0c89976 100644 --- a/src/workerd/api/http.h +++ b/src/workerd/api/http.h @@ -4,242 +4,31 @@ #pragma once -#include -#include -#include -#include #include "basics.h" +#include "blob.h" #include "cf-property.h" -#include #include "form-data.h" +#include "headers.h" +#include "queue.h" #include "web-socket.h" -#include -#include -#include "blob.h" -#include #include "worker-rpc.h" -#include "queue.h" - -namespace workerd::api { - -class Headers final: public jsg::Object { -private: - template - struct IteratorState { - kj::Array copy; - decltype(copy.begin()) cursor = copy.begin(); - }; - -public: - enum class Guard { - // WARNING: This type is serialized, do not change the numeric values. - IMMUTABLE = 0, - REQUEST = 1, - // REQUEST_NO_CORS, // CORS not relevant on server side - RESPONSE = 2, - NONE = 3 - }; - - struct DisplayedHeader { - jsg::ByteString key; // lower-cased name - jsg::ByteString value; // comma-concatenation of all values seen - }; - - Headers(): guard(Guard::NONE) {} - explicit Headers(jsg::Lock& js, jsg::Dict dict); - explicit Headers(jsg::Lock& js, const Headers& other); - explicit Headers(jsg::Lock& js, const kj::HttpHeaders& other, Guard guard); - - Headers(Headers&&) = delete; - Headers& operator=(Headers&&) = delete; - - // Make a copy of this Headers object, and preserve the guard. The normal copy constructor sets - // the copy's guard to NONE. - jsg::Ref clone(jsg::Lock& js) const; - - // Fill in the given HttpHeaders with these headers. Note that strings are inserted by - // reference, so the output must be consumed immediately. - void shallowCopyTo(kj::HttpHeaders& out); - - // Like has(), but only call this with an already-lower-case `name`. Useful to avoid an - // unnecessary string allocation. Not part of the JS interface. - bool hasLowerCase(kj::StringPtr name); - - // Returns headers with lower-case name and comma-concatenated duplicates. - kj::Array getDisplayedHeaders(jsg::Lock& js); - - using ByteStringPair = jsg::Sequence; - using ByteStringPairs = jsg::Sequence; - - // Per the fetch specification, it is possible to initialize a Headers object - // from any other object that has a Symbol.iterator implementation. Those are - // handled in this Initializer definition using the ByteStringPairs definition - // that aliases jsg::Sequence>. Technically, - // the Headers object itself falls under that definition as well. However, treating - // a Headers object as a jsg::Sequence> is nowhere near as - // performant and has the side effect of forcing all header names to be lower-cased - // rather than case-preserved. Instead of following the spec exactly here, we - // choose to special case creating a Header object from another Header object. - // This is an intentional departure from the spec. - using Initializer = kj::OneOf, - ByteStringPairs, - jsg::Dict>; - - static jsg::Ref constructor(jsg::Lock& js, jsg::Optional init); - kj::Maybe get(jsg::Lock& js, jsg::ByteString name); - - kj::Maybe getNoChecks(jsg::Lock& js, kj::StringPtr name); - - // getAll is a legacy non-standard extension API that we introduced before - // getSetCookie() was defined. We continue to support it for backwards - // compatibility but users really ought to be using getSetCookie() now. - kj::ArrayPtr getAll(jsg::ByteString name); - - // The Set-Cookie header is special in that it is the only HTTP header that - // is not permitted to be combined into a single instance. - kj::ArrayPtr getSetCookie(); - - bool has(jsg::ByteString name); - - void set(jsg::Lock& js, jsg::ByteString name, jsg::ByteString value); - - // Like set(), but ignores the header guard if set. This can only be called from C++, and may be - // used to mutate headers before dispatching a request. - void setUnguarded(jsg::Lock& js, jsg::ByteString name, jsg::ByteString value); - - void append(jsg::Lock& js, jsg::ByteString name, jsg::ByteString value); - - void delete_(jsg::ByteString name); - - void forEach(jsg::Lock& js, - jsg::Function)>, - jsg::Optional); - - bool inspectImmutable(); - - JSG_ITERATOR(EntryIterator, entries, - kj::Array, - IteratorState, - entryIteratorNext) - JSG_ITERATOR(KeyIterator, keys, - jsg::ByteString, - IteratorState, - keyOrValueIteratorNext) - JSG_ITERATOR(ValueIterator, values, - jsg::ByteString, - IteratorState, - keyOrValueIteratorNext) - - // JavaScript API. - - JSG_RESOURCE_TYPE(Headers, CompatibilityFlags::Reader flags) { - JSG_METHOD(get); - JSG_METHOD(getAll); - if (flags.getHttpHeadersGetSetCookie()) { - JSG_METHOD(getSetCookie); - } - JSG_METHOD(has); - JSG_METHOD(set); - JSG_METHOD(append); - JSG_METHOD_NAMED(delete, delete_); - JSG_METHOD(forEach); - JSG_METHOD(entries); - JSG_METHOD(keys); - JSG_METHOD(values); - - JSG_INSPECT_PROPERTY(immutable, inspectImmutable); - - JSG_ITERABLE(entries); - - JSG_TS_DEFINE(type HeadersInit = Headers | Iterable> | Record); - // All type aliases get inlined when exporting RTTI, but this type alias is included by - // the official TypeScript types, so users might be depending on it. - - JSG_TS_OVERRIDE({ - constructor(init?: HeadersInit); - - entries(): IterableIterator<[key: string, value: string]>; - [Symbol.iterator](): IterableIterator<[key: string, value: string]>; - - forEach(callback: (this: This, value: string, key: string, parent: Headers) => void, thisArg?: This): void; - }); - } - - void serialize(jsg::Lock& js, jsg::Serializer& serializer); - static jsg::Ref deserialize( - jsg::Lock& js, rpc::SerializationTag tag, jsg::Deserializer& deserializer); - - JSG_SERIALIZABLE(rpc::SerializationTag::HEADERS); - - void visitForMemoryInfo(jsg::MemoryTracker& tracker) const { - for (const auto& entry : headers) { - tracker.trackField(entry.first, entry.second); - } - } - -private: - struct Header { - jsg::ByteString key; // lower-cased name - jsg::ByteString name; - - // We intentionally do not comma-concatenate header values of the same name, as we need to be - // able to re-serialize them separately. This is particularly important for the Set-Cookie - // header, which uses a date format that requires a comma. This would normally suggest using a - // std::multimap, but we also need to be able to display the values in comma-concatenated form - // via Headers.entries()[1] in order to be Fetch-conformant. Storing a vector of strings in a - // std::map makes this easier, and also makes it easy to honor the "first header name casing is - // used for all duplicate header names" rule[2] that the Fetch spec mandates. - // - // See: 1: https://fetch.spec.whatwg.org/#concept-header-list-sort-and-combine - // 2: https://fetch.spec.whatwg.org/#concept-header-list-append - kj::Vector values; - - explicit Header(jsg::ByteString key, jsg::ByteString name, - kj::Vector values) - : key(kj::mv(key)), name(kj::mv(name)), values(kj::mv(values)) {} - explicit Header(jsg::ByteString key, jsg::ByteString name, jsg::ByteString value) - : key(kj::mv(key)), name(kj::mv(name)), values(1) { - values.add(kj::mv(value)); - } - - JSG_MEMORY_INFO(Header) { - tracker.trackField("key", key); - tracker.trackField("name", name); - for (const auto& value : values) { - tracker.trackField(nullptr, value); - } - } - }; - Guard guard; - std::map headers; - - void checkGuard() { - JSG_REQUIRE(guard == Guard::NONE, TypeError, "Can't modify immutable headers."); - } +#include +#include +#include +#include +#include +#include - static kj::Maybe> entryIteratorNext(jsg::Lock& js, auto& state) { - if (state.cursor == state.copy.end()) { - return kj::none; - } - auto& ret = *state.cursor++; - return kj::arr(kj::mv(ret.key), kj::mv(ret.value)); - } +#include - static kj::Maybe keyOrValueIteratorNext(jsg::Lock& js, auto& state) { - if (state.cursor == state.copy.end()) { - return kj::none; - } - auto& ret = *state.cursor++; - return kj::mv(ret); - } -}; +namespace workerd::api { // Base class for Request and Response. In JavaScript, this class is a mixin, meaning no one will // be instantiating objects of this type -- it exists solely to house body-related functionality // common to both Requests and Responses. class Body: public jsg::Object { -public: + public: // The types of objects from which a Body can be created. // // If the object is a ReadableStream, Body will adopt it directly; otherwise the object is some @@ -254,9 +43,13 @@ class Body: public jsg::Object { // will fail, because there is no body source left. On the other hand, if the body was constructed // from any of the other source types, Body can create a new ReadableStream from the source, and // the POST will successfully retransmit. - using Initializer = kj::OneOf, kj::String, kj::Array, - jsg::Ref, jsg::Ref, - jsg::Ref, jsg::Ref>; + using Initializer = kj::OneOf, + kj::String, + kj::Array, + jsg::Ref, + jsg::Ref, + jsg::Ref, + jsg::Ref>; struct RefcountedBytes final: public kj::Refcounted { kj::Array bytes; @@ -328,8 +121,8 @@ class Body: public jsg::Object { struct ExtractedBody { ExtractedBody(jsg::Ref stream, - kj::Maybe source = kj::none, - kj::Maybe contentType = kj::none); + kj::Maybe source = kj::none, + kj::Maybe contentType = kj::none); Impl impl; kj::Maybe contentType; @@ -397,11 +190,11 @@ class Body: public jsg::Object { tracker.trackField("impl", impl); } -protected: + protected: // Helper to implement Request/Response::clone(). kj::Maybe clone(jsg::Lock& js); -private: + private: kj::Maybe impl; // HACK: This `headersRef` variable refers to a Headers object in the Request/Response subclass. @@ -418,8 +211,8 @@ class Body: public jsg::Object { // Controls how response bodies are encoded/decoded according to Content-Encoding headers enum class Response_BodyEncoding { - AUTO, // Automatically encode/decode based on Content-Encoding headers - MANUAL // Treat Content-Encoding headers as opaque (no automatic encoding/decoding) + AUTO, // Automatically encode/decode based on Content-Encoding headers + MANUAL // Treat Content-Encoding headers as opaque (no automatic encoding/decoding) }; class Request; @@ -441,12 +234,9 @@ using AnySocketAddress = kj::OneOf; // TODO(cleanup): This probably doesn't belong in `http.h` anymore. And perhaps it should be // renamed, though I haven't heard any great suggestions for what the name should be. class Fetcher: public JsRpcClientProvider { -public: + public: // Should we use a fake https base url if we lack a scheme+authority? - enum class RequiresHostAndProtocol { - YES, - NO - }; + enum class RequiresHostAndProtocol { YES, NO }; // `channel` is what to pass to IoContext::getSubrequestChannel() to get a WorkerInterface // representing this Fetcher. Note that different requests potentially have different client @@ -459,7 +249,9 @@ class Fetcher: public JsRpcClientProvider { // // See pipeline.capnp or request-context.h for an explanation of `isInHouse`. explicit Fetcher(uint channel, RequiresHostAndProtocol requiresHost, bool isInHouse = false) - : channelOrClientFactory(channel), requiresHost(requiresHost), isInHouse(isInHouse) {} + : channelOrClientFactory(channel), + requiresHost(requiresHost), + isInHouse(isInHouse) {} // Create a Fetcher bound to an IoChannelFactory::SubrequestChannel object rather than a numeric // channel. This Fetcher will inherently be bound to the current I/O context. @@ -476,7 +268,7 @@ class Fetcher: public JsRpcClientProvider { // TODO(cleanup): Consider removing this in favor of `IoChannelFactory::SubrequestChannel`, which // is almost the same thing. class OutgoingFactory { - public: + public: virtual kj::Own newSingleUseClient(kj::Maybe cfStr) = 0; // Get a `SubrequestChannel` representing this Fetcher. This is used especially when the @@ -491,8 +283,9 @@ class Fetcher: public JsRpcClientProvider { // to a specific I/O context. The factory object moves with the isolate across threads and // contexts, and must work from any context. class CrossContextOutgoingFactory { - public: - virtual kj::Own newSingleUseClient(IoContext& context, kj::Maybe cfStr) = 0; + public: + virtual kj::Own newSingleUseClient( + IoContext& context, kj::Maybe cfStr) = 0; virtual kj::Own getSubrequestChannel(IoContext& context) { // TODO(soon): Update all implementations and remove this default implementation. @@ -503,8 +296,8 @@ class Fetcher: public JsRpcClientProvider { // `outgoingFactory` is used for Fetchers that use ad-hoc WorkerInterface instances, such as ones // created for Actors. Fetcher(IoOwn outgoingFactory, - RequiresHostAndProtocol requiresHost, - bool isInHouse = false) + RequiresHostAndProtocol requiresHost, + bool isInHouse = false) : channelOrClientFactory(kj::mv(outgoingFactory)), requiresHost(requiresHost), isInHouse(isInHouse) {} @@ -512,8 +305,8 @@ class Fetcher: public JsRpcClientProvider { // `outgoingFactory` is used for Fetchers that use ad-hoc WorkerInterface instances, but doesn't // require an IoContext Fetcher(kj::Own outgoingFactory, - RequiresHostAndProtocol requiresHost, - bool isInHouse = false) + RequiresHostAndProtocol requiresHost, + bool isInHouse = false) : channelOrClientFactory(kj::mv(outgoingFactory)), requiresHost(requiresHost), isInHouse(isInHouse) {} @@ -521,9 +314,7 @@ class Fetcher: public JsRpcClientProvider { // Returns an `WorkerInterface` that is only valid for the lifetime of the current // `IoContext`. kj::Own getClient( - IoContext& ioContext, - kj::Maybe cfStr, - kj::ConstString operationName); + IoContext& ioContext, kj::Maybe cfStr, kj::ConstString operationName); // Result of getClient call that includes optional trace context struct ClientWithTracing { @@ -533,9 +324,7 @@ class Fetcher: public JsRpcClientProvider { // Get client and optionally create trace context, all in one call ClientWithTracing getClientWithTracing( - IoContext& ioContext, - kj::Maybe cfStr, - kj::ConstString operationName); + IoContext& ioContext, kj::Maybe cfStr, kj::ConstString operationName); // Get a SubrequestChannel representing this Fetcher. kj::Own getSubrequestChannel(IoContext& ioContext); @@ -547,8 +336,8 @@ class Fetcher: public JsRpcClientProvider { jsg::Ref connect( jsg::Lock& js, AnySocketAddress address, jsg::Optional options); - jsg::Promise> fetch( - jsg::Lock& js, kj::OneOf, kj::String> requestOrUrl, + jsg::Promise> fetch(jsg::Lock& js, + kj::OneOf, kj::String> requestOrUrl, jsg::Optional>> requestInit); using GetResult = kj::OneOf, jsg::BufferSource, kj::String, jsg::Value>; @@ -598,8 +387,8 @@ class Fetcher: public JsRpcClientProvider { JSG_STRUCT(outcome, ackAll, retryBatch, explicitAcks, retryMessages); }; - jsg::Promise queue(jsg::Lock& js, kj::String queueName, - kj::Array messages); + jsg::Promise queue( + jsg::Lock& js, kj::String queueName, kj::Array messages); struct ScheduledOptions { jsg::Optional scheduledTime; @@ -797,10 +586,11 @@ struct RequestInitializerDict { // jsg::Optional priority; // TODO(conform): Might support later? - JSG_STRUCT(method, headers, body, redirect, fetcher, cf, cache, integrity, signal, encodeResponseBody); + JSG_STRUCT( + method, headers, body, redirect, fetcher, cf, cache, integrity, signal, encodeResponseBody); JSG_STRUCT_TS_OVERRIDE_DYNAMIC(CompatibilityFlags::Reader flags) { - if(flags.getCacheOptionEnabled()) { - if(flags.getCacheReload()) { + if (flags.getCacheOptionEnabled()) { + if (flags.getCacheReload()) { JSG_TS_OVERRIDE(RequestInit { headers?: HeadersInit; body?: BodyInit | null; @@ -809,7 +599,7 @@ struct RequestInitializerDict { encodeResponseBody?: "automatic" | "manual"; }); - } else if(flags.getCacheNoCache()) { + } else if (flags.getCacheNoCache()) { JSG_TS_OVERRIDE(RequestInit { headers?: HeadersInit; body?: BodyInit | null; @@ -843,7 +633,7 @@ struct RequestInitializerDict { }; class Request final: public Body { -public: + public: enum class Redirect { FOLLOW, MANUAL @@ -860,15 +650,27 @@ class Request final: public Body { RELOAD, }; - Request(jsg::Lock& js, kj::HttpMethod method, kj::StringPtr url, Redirect redirect, - jsg::Ref headers, kj::Maybe> fetcher, - kj::Maybe> signal, CfProperty&& cf, - kj::Maybe body, kj::Maybe> thisSignal, - CacheMode cacheMode = CacheMode::NONE, - Response_BodyEncoding responseBodyEncoding = Response_BodyEncoding::AUTO) - : Body(js, kj::mv(body), *headers), method(method), url(kj::str(url)), - redirect(redirect), headers(kj::mv(headers)), fetcher(kj::mv(fetcher)), - cacheMode(cacheMode), cf(kj::mv(cf)), responseBodyEncoding(responseBodyEncoding) { + Request(jsg::Lock& js, + kj::HttpMethod method, + kj::StringPtr url, + Redirect redirect, + jsg::Ref headers, + kj::Maybe> fetcher, + kj::Maybe> signal, + CfProperty&& cf, + kj::Maybe body, + kj::Maybe> thisSignal, + CacheMode cacheMode = CacheMode::NONE, + Response_BodyEncoding responseBodyEncoding = Response_BodyEncoding::AUTO) + : Body(js, kj::mv(body), *headers), + method(method), + url(kj::str(url)), + redirect(redirect), + headers(kj::mv(headers)), + fetcher(kj::mv(fetcher)), + cacheMode(cacheMode), + cf(kj::mv(cf)), + responseBodyEncoding(responseBodyEncoding) { KJ_IF_SOME(s, signal) { // If the AbortSignal will never abort, assigning it to thisSignal instead ensures // that the cancel machinery is not used but the request.signal accessor will still @@ -888,9 +690,15 @@ class Request final: public Body { // constructs like `new Request("")` should actually throw TypeError, but constructing Requests // with empty URLs is useful in testing. - kj::HttpMethod getMethodEnum() { return method; } - void setMethodEnum(kj::HttpMethod newMethod) { method = newMethod; } - Redirect getRedirectEnum() { return redirect; } + kj::HttpMethod getMethodEnum() { + return method; + } + void setMethodEnum(kj::HttpMethod newMethod) { + method = newMethod; + } + Redirect getRedirectEnum() { + return redirect; + } void shallowCopyHeadersTo(kj::HttpHeaders& out); kj::Maybe serializeCfBlobJson(jsg::Lock& js); @@ -907,14 +715,10 @@ class Request final: public Body { // // C++ API, but declared down here because we need the InitializerDict type. static jsg::Ref coerce( - jsg::Lock& js, - Request::Info input, - jsg::Optional init); + jsg::Lock& js, Request::Info input, jsg::Optional init); static jsg::Ref constructor( - jsg::Lock& js, - Request::Info input, - jsg::Optional init); + jsg::Lock& js, Request::Info input, jsg::Optional init); jsg::Ref clone(jsg::Lock& js); @@ -965,7 +769,9 @@ class Request final: public Body { // the standard property, hard-coded to always be false. WinterTC actually recommends that // this one just be left undefined but we already had this returning false always and it // would require a compat flag to remove. Just keep it as it's harmless. - bool getKeepalive() { return false; } + bool getKeepalive() { + return false; + } // The cache mode determines how HTTP cache is used with the request. jsg::Optional getCache(jsg::Lock& js); @@ -974,10 +780,14 @@ class Request final: public Body { // We do not implement integrity checking at all. However, the spec says that // the default value should be an empty string. When the Request object is // created we verify that the given value is undefined or empty. - kj::String getIntegrity() { return kj::String(); } + kj::String getIntegrity() { + return kj::String(); + } // Get the response body encoding setting for this request - Response_BodyEncoding getResponseBodyEncoding() { return responseBodyEncoding; } + Response_BodyEncoding getResponseBodyEncoding() { + return responseBodyEncoding; + } JSG_RESOURCE_TYPE(Request, CompatibilityFlags::Reader flags) { JSG_INHERIT(Body); @@ -1002,16 +812,16 @@ class Request final: public Body { // JSG_READONLY_PROTOTYPE_PROPERTY(duplex, getDuplex); JSG_READONLY_PROTOTYPE_PROPERTY(integrity, getIntegrity); JSG_READONLY_PROTOTYPE_PROPERTY(keepalive, getKeepalive); - if(flags.getCacheOptionEnabled()) { + if (flags.getCacheOptionEnabled()) { JSG_READONLY_PROTOTYPE_PROPERTY(cache, getCache); - if(flags.getCacheReload()) { + if (flags.getCacheReload()) { JSG_TS_OVERRIDE(> { constructor(input: RequestInfo | URL, init?: RequestInit); clone(): Request; cache?: "no-store" | "no-cache" | "reload"; get cf(): Cf | undefined; }); - } else if(flags.getCacheNoCache()) { + } else if (flags.getCacheNoCache()) { JSG_TS_OVERRIDE(> { constructor(input: RequestInfo | URL, init?: RequestInit); clone(): Request; @@ -1060,11 +870,12 @@ class Request final: public Body { } } - void serialize( - jsg::Lock& js, jsg::Serializer& serializer, + void serialize(jsg::Lock& js, + jsg::Serializer& serializer, const jsg::TypeHandler& initDictHandler); - static jsg::Ref deserialize( - jsg::Lock& js, rpc::SerializationTag tag, jsg::Deserializer& deserializer, + static jsg::Ref deserialize(jsg::Lock& js, + rpc::SerializationTag tag, + jsg::Deserializer& deserializer, const jsg::TypeHandler& initDictHandler); JSG_SERIALIZABLE(rpc::SerializationTag::REQUEST); @@ -1078,7 +889,7 @@ class Request final: public Body { tracker.trackField("cf", cf); } -private: + private: kj::HttpMethod method; kj::String url; Redirect redirect; @@ -1105,15 +916,19 @@ class Request final: public Body { }; class Response final: public Body { -public: + public: // Alias to the global Response_BodyEncoding enum for backward compatibility using BodyEncoding = Response_BodyEncoding; - Response(jsg::Lock& js, int statusCode, kj::String statusText, jsg::Ref headers, - CfProperty&& cf, kj::Maybe body, - kj::Array urlList = {}, - kj::Maybe> webSocket = kj::none, - BodyEncoding bodyEncoding = BodyEncoding::AUTO); + Response(jsg::Lock& js, + int statusCode, + kj::String statusText, + jsg::Ref headers, + CfProperty&& cf, + kj::Maybe body, + kj::Array urlList = {}, + kj::Maybe> webSocket = kj::none, + BodyEncoding bodyEncoding = BodyEncoding::AUTO); // --------------------------------------------------------------------------- // JS API @@ -1147,8 +962,7 @@ class Response final: public Body { // - We need to be able to call `new Response()`, meaning the body initializer MUST be Optional. // - We need to be able to call `new Response(null)`, but `null` cannot implicitly convert to // an Optional, so we need an inner Maybe to inhibit string coercion to Body::Initializer. - static jsg::Ref constructor( - jsg::Lock& js, + static jsg::Ref constructor(jsg::Lock& js, jsg::Optional> bodyInit, jsg::Optional maybeInit); @@ -1174,17 +988,16 @@ class Response final: public Body { jsg::Ref clone(jsg::Lock& js); static jsg::Ref json_( - jsg::Lock& js, - jsg::JsValue any, - jsg::Optional maybeInit); + jsg::Lock& js, jsg::JsValue any, jsg::Optional maybeInit); struct SendOptions { bool allowWebSocket = false; }; // Helper not exposed to JavaScript. - kj::Promise> send( - jsg::Lock& js, kj::HttpService::Response& outer, SendOptions options, + kj::Promise> send(jsg::Lock& js, + kj::HttpService::Response& outer, + SendOptions options, kj::Maybe maybeReqHeaders); int getStatus(); @@ -1253,12 +1066,13 @@ class Response final: public Body { // Use `BodyInit` and `ResponseInit` type aliases in constructor instead of inlining } - void serialize( - jsg::Lock& js, jsg::Serializer& serializer, + void serialize(jsg::Lock& js, + jsg::Serializer& serializer, const jsg::TypeHandler& initDictHandler, const jsg::TypeHandler>>& streamHandler); - static jsg::Ref deserialize( - jsg::Lock& js, rpc::SerializationTag tag, jsg::Deserializer& deserializer, + static jsg::Ref deserialize(jsg::Lock& js, + rpc::SerializationTag tag, + jsg::Deserializer& deserializer, const jsg::TypeHandler& initDictHandler, const jsg::TypeHandler>>& streamHandler); @@ -1269,13 +1083,13 @@ class Response final: public Body { tracker.trackField("headers", headers); tracker.trackField("webSocket", webSocket); tracker.trackField("cf", cf); - for (const auto& url : urlList) { + for (const auto& url: urlList) { tracker.trackField("urlList", url); } tracker.trackField("asyncContext", asyncContext); } -private: + private: int statusCode; kj::String statusText; jsg::Ref headers; @@ -1311,9 +1125,10 @@ class Response final: public Body { }; class FetchEvent final: public ExtendableEvent { -public: + public: FetchEvent(jsg::Ref request) - : ExtendableEvent("fetch"), request(kj::mv(request)), + : ExtendableEvent("fetch"), + request(kj::mv(request)), state(AwaitingRespondWith()) {} kj::Maybe>> getResponsePromise(jsg::Lock& js); @@ -1343,7 +1158,7 @@ class FetchEvent final: public ExtendableEvent { } } -private: + private: jsg::Ref request; struct AwaitingRespondWith {}; @@ -1362,16 +1177,19 @@ class FetchEvent final: public ExtendableEvent { } }; -jsg::Promise> fetchImpl( - jsg::Lock& js, +jsg::Promise> fetchImpl(jsg::Lock& js, kj::Maybe> fetcher, // if null, use fetcher from request object Request::Info requestOrUrl, jsg::Optional requestInit); -jsg::Ref makeHttpResponse( - jsg::Lock& js, kj::HttpMethod method, kj::Vector urlList, - uint statusCode, kj::StringPtr statusText, const kj::HttpHeaders& headers, - kj::Own body, kj::Maybe> webSocket, +jsg::Ref makeHttpResponse(jsg::Lock& js, + kj::HttpMethod method, + kj::Vector urlList, + uint statusCode, + kj::StringPtr statusText, + const kj::HttpHeaders& headers, + kj::Own body, + kj::Maybe> webSocket, Response::BodyEncoding bodyEncoding = Response::BodyEncoding::AUTO, kj::Maybe> signal = kj::none); @@ -1382,26 +1200,13 @@ kj::String makeRandomBoundaryCharacters(); // Make a boundary string for FormData serialization. // TODO(cleanup): Move to form-data.{h,c++}? -#define EW_HTTP_ISOLATE_TYPES \ - api::FetchEvent, \ - api::Headers, \ - api::Headers::EntryIterator, \ - api::Headers::EntryIterator::Next, \ - api::Headers::KeyIterator, \ - api::Headers::KeyIterator::Next, \ - api::Headers::ValueIterator, \ - api::Headers::ValueIterator::Next, \ - api::Body, \ - api::Response, \ - api::Response::InitializerDict, \ - api::Request, \ - api::Request::InitializerDict, \ - api::Fetcher, \ - api::Fetcher::PutOptions, \ - api::Fetcher::ScheduledOptions, \ - api::Fetcher::ScheduledResult, \ - api::Fetcher::QueueResult, \ - api::Fetcher::ServiceBindingQueueMessage +#define EW_HTTP_ISOLATE_TYPES \ + api::FetchEvent, api::Headers, api::Headers::EntryIterator, api::Headers::EntryIterator::Next, \ + api::Headers::KeyIterator, api::Headers::KeyIterator::Next, api::Headers::ValueIterator, \ + api::Headers::ValueIterator::Next, api::Body, api::Response, api::Response::InitializerDict, \ + api::Request, api::Request::InitializerDict, api::Fetcher, api::Fetcher::PutOptions, \ + api::Fetcher::ScheduledOptions, api::Fetcher::ScheduledResult, api::Fetcher::QueueResult, \ + api::Fetcher::ServiceBindingQueueMessage // The list of http.h types that are added to worker.c++'s JSG_DECLARE_ISOLATE_TYPE } // namespace workerd::api diff --git a/src/workerd/api/r2-bucket.c++ b/src/workerd/api/r2-bucket.c++ index 92a8df0f0a3..1558a59b480 100644 --- a/src/workerd/api/r2-bucket.c++ +++ b/src/workerd/api/r2-bucket.c++ @@ -50,11 +50,11 @@ static kj::Date parseDate(jsg::Lock& js, kj::StringPtr value) { return js.date(value); } -static jsg::ByteString toUTCString(jsg::Lock& js, kj::Date date) { +static kj::String toUTCString(jsg::Lock& js, kj::Date date) { return js.date(date).toUTCString(js); } -static jsg::ByteString toISOString(jsg::Lock& js, kj::Date date) { +static kj::String toISOString(jsg::Lock& js, kj::Date date) { return js.date(date).toISOString(js); } @@ -425,7 +425,7 @@ void initGetOptions(TraceContext& traceContext, jsg::Lock& js, Builder& builder, } KJ_CASE_ONEOF(h, jsg::Ref) { - KJ_IF_SOME(e, h->getNoChecks(js, "range"_kj)) { + KJ_IF_SOME(e, h->getCommon(js, capnp::CommonHeaderName::RANGE)) { builder.setRangeHeader(kj::str(e)); traceContext.userSpan.setTag("cloudflare.r2.request.range"_kjc, kj::str(e)); } @@ -1336,7 +1336,7 @@ kj::Array buildSingleEtagArray(kj::StringPtr etagValue) { R2Bucket::UnwrappedConditional::UnwrappedConditional(jsg::Lock& js, Headers& h) : secondsGranularity(true) { - KJ_IF_SOME(e, h.getNoChecks(js, "if-match"_kj)) { + KJ_IF_SOME(e, h.getCommon(js, capnp::CommonHeaderName::IF_MATCH)) { etagMatches = parseConditionalEtagHeader(kj::str(e)); KJ_IF_SOME(arr, etagMatches) { if (arr.size() == 0) { @@ -1344,7 +1344,7 @@ R2Bucket::UnwrappedConditional::UnwrappedConditional(jsg::Lock& js, Headers& h) } } } - KJ_IF_SOME(e, h.getNoChecks(js, "if-none-match"_kj)) { + KJ_IF_SOME(e, h.getCommon(js, capnp::CommonHeaderName::IF_NONE_MATCH)) { etagDoesNotMatch = parseConditionalEtagHeader(kj::str(e)); KJ_IF_SOME(arr, etagDoesNotMatch) { if (arr.size() == 0) { @@ -1352,11 +1352,11 @@ R2Bucket::UnwrappedConditional::UnwrappedConditional(jsg::Lock& js, Headers& h) } } } - KJ_IF_SOME(d, h.getNoChecks(js, "if-modified-since"_kj)) { + KJ_IF_SOME(d, h.getCommon(js, capnp::CommonHeaderName::IF_MODIFIED_SINCE)) { auto date = parseDate(js, d); uploadedAfter = date; } - KJ_IF_SOME(d, h.getNoChecks(js, "if-unmodified-since"_kj)) { + KJ_IF_SOME(d, h.getCommon(js, capnp::CommonHeaderName::IF_UNMODIFIED_SINCE)) { auto date = parseDate(js, d); uploadedBefore = date; } @@ -1384,22 +1384,22 @@ R2Bucket::UnwrappedConditional::UnwrappedConditional(const Conditional& c) R2Bucket::HttpMetadata R2Bucket::HttpMetadata::fromRequestHeaders(jsg::Lock& js, Headers& h) { HttpMetadata result; - KJ_IF_SOME(ct, h.getNoChecks(js, "content-type")) { + KJ_IF_SOME(ct, h.getCommon(js, capnp::CommonHeaderName::CONTENT_TYPE)) { result.contentType = kj::mv(ct); } - KJ_IF_SOME(ce, h.getNoChecks(js, "content-encoding"_kj)) { + KJ_IF_SOME(ce, h.getCommon(js, capnp::CommonHeaderName::CONTENT_ENCODING)) { result.contentEncoding = kj::mv(ce); } - KJ_IF_SOME(cd, h.getNoChecks(js, "content-disposition"_kj)) { + KJ_IF_SOME(cd, h.getCommon(js, capnp::CommonHeaderName::CONTENT_DISPOSITION)) { result.contentDisposition = kj::mv(cd); } - KJ_IF_SOME(cl, h.getNoChecks(js, "content-language"_kj)) { + KJ_IF_SOME(cl, h.getCommon(js, capnp::CommonHeaderName::CONTENT_LANGUAGE)) { result.contentLanguage = kj::mv(cl); } - KJ_IF_SOME(cc, h.getNoChecks(js, "cache-control"_kj)) { + KJ_IF_SOME(cc, h.getCommon(js, capnp::CommonHeaderName::CACHE_CONTROL)) { result.cacheControl = kj::mv(cc); } - KJ_IF_SOME(ceStr, h.getNoChecks(js, "expires"_kj)) { + KJ_IF_SOME(ceStr, h.getCommon(js, capnp::CommonHeaderName::EXPIRES)) { result.cacheExpiry = parseDate(js, ceStr); } @@ -1424,22 +1424,22 @@ void R2Bucket::HeadResult::writeHttpMetadata(jsg::Lock& js, Headers& headers) { const auto& m = KJ_REQUIRE_NONNULL(httpMetadata); KJ_IF_SOME(ct, m.contentType) { - headers.set(js, jsg::ByteString(kj::str("content-type")), jsg::ByteString(kj::str(ct))); + headers.setCommon(capnp::CommonHeaderName::CONTENT_TYPE, kj::str(ct)); } KJ_IF_SOME(cl, m.contentLanguage) { - headers.set(js, jsg::ByteString(kj::str("content-language")), jsg::ByteString(kj::str(cl))); + headers.setCommon(capnp::CommonHeaderName::CONTENT_LANGUAGE, kj::str(cl)); } KJ_IF_SOME(cd, m.contentDisposition) { - headers.set(js, jsg::ByteString(kj::str("content-disposition")), jsg::ByteString(kj::str(cd))); + headers.setCommon(capnp::CommonHeaderName::CONTENT_DISPOSITION, kj::str(cd)); } KJ_IF_SOME(ce, m.contentEncoding) { - headers.set(js, jsg::ByteString(kj::str("content-encoding")), jsg::ByteString(kj::str(ce))); + headers.setCommon(capnp::CommonHeaderName::CONTENT_ENCODING, kj::str(ce)); } KJ_IF_SOME(cc, m.cacheControl) { - headers.set(js, jsg::ByteString(kj::str("cache-control")), jsg::ByteString(kj::str(cc))); + headers.setCommon(capnp::CommonHeaderName::CACHE_CONTROL, kj::str(cc)); } KJ_IF_SOME(ce, m.cacheExpiry) { - headers.set(js, jsg::ByteString(kj::str("expires")), toUTCString(js, ce)); + headers.setCommon(capnp::CommonHeaderName::EXPIRES, toUTCString(js, ce)); } } diff --git a/src/workerd/api/trace.c++ b/src/workerd/api/trace.c++ index 4819241dfcb..c8814b4c5ab 100644 --- a/src/workerd/api/trace.c++ +++ b/src/workerd/api/trace.c++ @@ -341,8 +341,7 @@ jsg::Optional> TraceItem::FetchEventInfo::Request::getCf( return detail->cf.map([&](jsg::V8Ref& obj) { return obj.addRef(js); }); } -jsg::Dict TraceItem::FetchEventInfo::Request::getHeaders( - jsg::Lock& js) { +jsg::Dict TraceItem::FetchEventInfo::Request::getHeaders(jsg::Lock& js) { auto shouldRedact = [](kj::StringPtr name) { return ( //(name == "authorization"_kj) || // covered below @@ -351,12 +350,11 @@ jsg::Dict TraceItem::FetchEventInfo::Request:: name.contains("token"_kjc)); }; - using HeaderDict = jsg::Dict; + using HeaderDict = jsg::Dict; auto builder = kj::heapArrayBuilder(detail->headers.size()); for (const auto& header: detail->headers) { auto v = (redacted && shouldRedact(header.name)) ? "REDACTED"_kj : header.value; - builder.add( - HeaderDict::Field{jsg::ByteString(kj::str(header.name)), jsg::ByteString(kj::str(v))}); + builder.add(HeaderDict::Field{kj::str(header.name), kj::str(v)}); } // TODO(conform): Better to return a frozen JS Object? diff --git a/src/workerd/api/trace.h b/src/workerd/api/trace.h index a32ba1b708e..e4b26f1c2f3 100644 --- a/src/workerd/api/trace.h +++ b/src/workerd/api/trace.h @@ -220,7 +220,7 @@ class TraceItem::FetchEventInfo::Request final: public jsg::Object { explicit Request(Detail& detail, bool redacted = true); jsg::Optional> getCf(jsg::Lock& js); - jsg::Dict getHeaders(jsg::Lock& js); + jsg::Dict getHeaders(jsg::Lock& js); kj::StringPtr getMethod(); kj::String getUrl(); diff --git a/src/workerd/io/BUILD.bazel b/src/workerd/io/BUILD.bazel index b4ea3eef368..3ae2004e59c 100644 --- a/src/workerd/io/BUILD.bazel +++ b/src/workerd/io/BUILD.bazel @@ -123,6 +123,7 @@ wd_cc_library( "//src/workerd/jsg:script", "//src/workerd/util:checked-queue", "//src/workerd/util:exception", + "//src/workerd/util:header-validation", "//src/workerd/util:ring-buffer", "//src/workerd/util:small-set", "//src/workerd/util:sqlite", diff --git a/src/workerd/jsg/README.md b/src/workerd/jsg/README.md index f9ad0230844..d28f9a99fcd 100644 --- a/src/workerd/jsg/README.md +++ b/src/workerd/jsg/README.md @@ -105,16 +105,15 @@ At the time of writing this, the primitive value types currently supported by th | uint32_t | v8::Uint32 | number | | | uint64_t | v8::BigInt | bigint | | | kj::String(Ptr) | v8::String | string | | -| jsg::ByteString | v8::String | string | See [ByteString][] spec | | kj::Date | v8::Date | Date | | | nullptr | v8::Null | null | See kj::Maybe<T> | | nullptr | v8::Undefined | undefined | See jsg::Optional<T> | Specifically, for example, when mapping from JavaScript into C++, when JSG encounters a -string value, it can convert that into either a `kj::String`, or `jsg::ByteString`, +string value, it can convert that into either a `kj::String`, or `jsg::USVString`, depending on what is needed by the C++ layer. Likewise, when translating from C++ to JavaScript, JSG will generate a JavaScript `string` whenever it encounters a `kj::String`, -`kj::StringPtr`, or `jsg::ByteString`. +`kj::StringPtr`, or `jsg::USVString`. JSG will *not* translate JavaScript `string` to `kj::StringPtr`. @@ -1414,7 +1413,6 @@ TODO(soon): TBD ["KJ Style Guide"]: https://github.com/capnproto/capnproto/blob/master/style-guide.md ["KJ Tour"]: https://github.com/capnproto/capnproto/blob/master/kjdoc/tour.md -[ByteString]: https://webidl.spec.whatwg.org/#idl-ByteString [Record]: https://webidl.spec.whatwg.org/#idl-record [Sequence]: https://webidl.spec.whatwg.org/#idl-sequence diff --git a/src/workerd/jsg/fast-api.h b/src/workerd/jsg/fast-api.h index 99e587f808c..c672f50e7ba 100644 --- a/src/workerd/jsg/fast-api.h +++ b/src/workerd/jsg/fast-api.h @@ -26,10 +26,9 @@ namespace workerd::jsg { -class ByteString; class DOMString; -class Lock; class USVString; +class Lock; template class Promise; diff --git a/src/workerd/jsg/jsg.h b/src/workerd/jsg/jsg.h index 64ca92cd218..887c7b281ea 100644 --- a/src/workerd/jsg/jsg.h +++ b/src/workerd/jsg/jsg.h @@ -1083,34 +1083,6 @@ void jsgAddToStructNames(auto& names) { if constexpr (isUsableStructField) names.add(exportedName); } -// TODO(cleanup): This class was meant to be a ByteString (characters in the range [0,255]), but -// its only use so far is in api::Headers. But making the Headers class use ByteStrings turned -// out to be unwise. Nevertheless, it is still useful to keep around in order to provide -// feedback to script authors when they are using header strings that may be incompatible with -// browser implementations of the Fetch spec. -// -// Move this class to the `api` directory and rename to HeaderString. -class ByteString: public kj::String { - public: - // Inheriting constructors does not inherit copy/move constructors, so we declare a forwarding - // constructor instead. - template - explicit ByteString(Params&&... params): kj::String(kj::fwd(params)...) {} - - enum class Warning { - NONE, // Contains 7-bit code points -- semantics won't change - CONTAINS_EXTENDED_ASCII, // Contains 8-bit code points -- semantics WILL change - CONTAINS_UNICODE, // Contains 16-bit code points -- semantics WILL change - }; - Warning warning = Warning::NONE; - // HACK: ByteString behaves just like a kj::String, but has this crappy enum to tell the code that - // consumes it that it contains a value which a real Web IDL ByteString would have encoded - // differently. We can't usefully do anything about the information in JSG, because we don't - // have access to the IoContext to print a warning in the inspector. - // - // We default the enum to NONE so that ByteString(kj::str(otherHeader)) works as expected. -}; - // A USVString has the exact same representation as a kj::String, but we guarantee that it meets // the WHATWG definition of a "scalar value string". Particularly, a USVString will never contain // invalid surrogate characters. A USVString should be used when implementing a Web API that diff --git a/src/workerd/jsg/jsvalue.c++ b/src/workerd/jsg/jsvalue.c++ index 4fc19bdef1f..bd0b8a838ab 100644 --- a/src/workerd/jsg/jsvalue.c++ +++ b/src/workerd/jsg/jsvalue.c++ @@ -355,26 +355,6 @@ jsg::USVString JsString::toUSVString(Lock& js) const { return jsg::USVString(kj::mv(buf)); } -jsg::ByteString JsString::toByteString(Lock& js) const { - auto result = jsg::ByteString(toString(js)); - - if (!simdutf::validate_ascii(result.begin(), result.size())) { - // If storage is one-byte or the string contains only one-byte - // characters, we know that it contains extended ASCII characters. - // - // The order of execution matters, since ContainsOnlyOneByte() - // will scan the whole string for two-byte storage. - if (inner->ContainsOnlyOneByte()) { - result.warning = ByteString::Warning::CONTAINS_EXTENDED_ASCII; - } else { - // Storage is two-bytes and it contains two-byte characters. - result.warning = ByteString::Warning::CONTAINS_UNICODE; - } - } - - return kj::mv(result); -} - jsg::DOMString JsString::toDOMString(Lock& js) const { auto buf = kj::heapArray(inner->Utf8LengthV2(js.v8Isolate) + 1); inner->WriteUtf8V2(js.v8Isolate, buf.begin(), buf.size(), v8::String::WriteFlags::kNullTerminate); @@ -451,14 +431,14 @@ bool JsRegExp::match(Lock& js, kj::StringPtr input) { return !result->IsNull(); } -jsg::ByteString JsDate::toUTCString(jsg::Lock& js) const { +kj::String JsDate::toUTCString(jsg::Lock& js) const { JsString str(inner->ToUTCString()); - return jsg::ByteString(str.toString(js)); + return str.toString(js); } -jsg::ByteString JsDate::toISOString(jsg::Lock& js) const { +kj::String JsDate::toISOString(jsg::Lock& js) const { JsString str(inner->ToISOString()); - return jsg::ByteString(str.toString(js)); + return str.toString(js); } JsDate::operator kj::Date() const { diff --git a/src/workerd/jsg/jsvalue.h b/src/workerd/jsg/jsvalue.h index aaceee887d6..dbb5e260ab8 100644 --- a/src/workerd/jsg/jsvalue.h +++ b/src/workerd/jsg/jsvalue.h @@ -250,7 +250,6 @@ class JsString final: public JsBase { size_t utf8Length(Lock& js) const KJ_WARN_UNUSED_RESULT; kj::String toString(Lock& js) const KJ_WARN_UNUSED_RESULT; jsg::USVString toUSVString(Lock& js) const KJ_WARN_UNUSED_RESULT; - jsg::ByteString toByteString(Lock& js) const KJ_WARN_UNUSED_RESULT; jsg::DOMString toDOMString(Lock& js) const KJ_WARN_UNUSED_RESULT; int hashCode() const; @@ -303,8 +302,8 @@ class JsRegExp final: public JsBase { class JsDate final: public JsBase { public: - jsg::ByteString toUTCString(Lock& js) const; - jsg::ByteString toISOString(Lock& js) const; + kj::String toUTCString(Lock& js) const; + kj::String toISOString(Lock& js) const; operator kj::Date() const; using JsBase::JsBase; }; diff --git a/src/workerd/jsg/rtti-test.c++ b/src/workerd/jsg/rtti-test.c++ index 28b31729e1a..5de39cd88ee 100644 --- a/src/workerd/jsg/rtti-test.c++ +++ b/src/workerd/jsg/rtti-test.c++ @@ -91,7 +91,7 @@ KJ_TEST("string types") { KJ_EXPECT(tType() == "(string = (name = \"kj::String\"))"); KJ_EXPECT(tType() == "(string = (name = \"kj::StringPtr\"))"); KJ_EXPECT(tType() == "(string = (name = \"v8::String\"))"); - KJ_EXPECT(tType() == "(string = (name = \"ByteString\"))"); + KJ_EXPECT(tType() == "(string = (name = \"USVString\"))"); } KJ_TEST("object types") { diff --git a/src/workerd/jsg/rtti.h b/src/workerd/jsg/rtti.h index b3b512a95a0..6e416be0bb2 100644 --- a/src/workerd/jsg/rtti.h +++ b/src/workerd/jsg/rtti.h @@ -246,7 +246,6 @@ FOR_EACH_NUMBER_TYPE(DECLARE_NUMBER_TYPE) F(kj::String) \ F(kj::StringPtr) \ F(v8::String) \ - F(ByteString) \ F(USVString) \ F(DOMString) \ F(jsg::JsString) diff --git a/src/workerd/jsg/value-test.c++ b/src/workerd/jsg/value-test.c++ index c2b7277e45b..d6de6169378 100644 --- a/src/workerd/jsg/value-test.c++ +++ b/src/workerd/jsg/value-test.c++ @@ -851,27 +851,6 @@ KJ_TEST("jsg::DOMStrings") { // ======================================================================================== -struct ByteStringContext: public ContextGlobalObject { - ByteString takeByteString(ByteString s) { - return kj::mv(s); - } - JSG_RESOURCE_TYPE(ByteStringContext) { - JSG_METHOD(takeByteString); - } -}; -JSG_DECLARE_ISOLATE_TYPE(ByteStringIsolate, ByteStringContext); - -KJ_TEST("ByteStrings") { - Evaluator e(v8System); - e.expectEval("takeByteString('foo\\0bar') === 'foo\\0bar'", "boolean", "true"); - // ffi is 0xEF 0xAC 0x83 in UTF-8. - e.expectEval("takeByteString('\\xEF\\xAC\\x83') === '\\xEF\\xAC\\x83'", "boolean", "true"); - - // TODO(cleanup): ByteString should become HeaderString somewhere in the api directory. -} - -// ======================================================================================== - struct RawContext: public ContextGlobalObject { struct TwoValues { Value $foo; diff --git a/src/workerd/jsg/value.h b/src/workerd/jsg/value.h index c4a249c9bd4..36ea3514975 100644 --- a/src/workerd/jsg/value.h +++ b/src/workerd/jsg/value.h @@ -455,12 +455,9 @@ class StringWrapper { } template - requires(kj::isSameType() || kj::isSameType() || - kj::isSameType()) + requires(kj::isSameType() || kj::isSameType()) static constexpr const char* getName(T*) { - if constexpr (kj::isSameType()) { - return "ByteString"; - } else if constexpr (kj::isSameType()) { + if constexpr (kj::isSameType()) { return "USVString"; } else if constexpr (kj::isSameType()) { return "DOMString"; @@ -488,8 +485,7 @@ class StringWrapper { } template - requires(kj::isSameType() || kj::isSameType() || - kj::isSameType()) + requires(kj::isSameType() || kj::isSameType()) v8::Local wrap( Lock& js, v8::Local context, kj::Maybe> creator, T value) { // TODO(cleanup): Move to a HeaderStringWrapper in the api directory. @@ -497,8 +493,8 @@ class StringWrapper { } template - requires(kj::isSameType() || kj::isSameType() || - kj::isSameType() || kj::isSameType()) + requires(kj::isSameType() || kj::isSameType() || + kj::isSameType()) kj::Maybe tryUnwrap(Lock& js, v8::Local context, v8::Local handle, @@ -512,8 +508,6 @@ class StringWrapper { JsString str(check(handle->ToString(context))); if constexpr (kj::isSameType()) { return str.toString(js); - } else if constexpr (kj::isSameType()) { - return str.toByteString(js); } else if constexpr (kj::isSameType()) { return str.toUSVString(js); } else if constexpr (kj::isSameType()) { diff --git a/src/workerd/jsg/web-idl-test.c++ b/src/workerd/jsg/web-idl-test.c++ index 1b310cc29f5..28f2c49b38f 100644 --- a/src/workerd/jsg/web-idl-test.c++ +++ b/src/workerd/jsg/web-idl-test.c++ @@ -38,7 +38,8 @@ static_assert(webidl::hasDuplicateTypes == true); static_assert(webidl::hasDuplicateTypes == true); static_assert(webidl::hasDuplicateTypes == true); -static_assert(webidl::FlattenedTypeTraits::stringTypeCount == 2); +static_assert(webidl::FlattenedTypeTraits::stringTypeCount == 2); +static_assert(webidl::FlattenedTypeTraits::stringTypeCount == 2); KJ_TEST("web-idl meta") { // Nothing to actually do here; tests are compile-time diff --git a/src/workerd/jsg/web-idl.h b/src/workerd/jsg/web-idl.h index 0087b36bd5c..c50d5f24bbc 100644 --- a/src/workerd/jsg/web-idl.h +++ b/src/workerd/jsg/web-idl.h @@ -113,11 +113,11 @@ constexpr bool isNumericType = isIntegerType || kj::isSameType() || kj::isSameType>(); template -constexpr bool isStringType = kj::isSameType() || kj::isSameType() || - kj::isSameType() || kj::isSameType() || - kj::isSameType>() || kj::isSameType>() || - kj::isSameType>() || kj::isSameType>() || - kj::isSameType>() || kj::isSameType(); +constexpr bool isStringType = kj::isSameType() || kj::isSameType() || + kj::isSameType() || kj::isSameType>() || + kj::isSameType>() || kj::isSameType>() || + kj::isSameType>() || kj::isSameType>() || + kj::isSameType(); template constexpr bool isObjectType = diff --git a/src/workerd/tests/bench-api-headers.c++ b/src/workerd/tests/bench-api-headers.c++ index 66a4f0642be..ff0c184c2e8 100644 --- a/src/workerd/tests/bench-api-headers.c++ +++ b/src/workerd/tests/bench-api-headers.c++ @@ -93,11 +93,9 @@ BENCHMARK_F(ApiHeaders, set_append)(benchmark::State& state) { for (int n = 0; n < 13; n++) { auto& h = kHeaders[n]; if (h.append) { - headers->append( - env.js, jsg::ByteString(kj::str(h.name)), jsg::ByteString(kj::str(h.value))); + headers->append(env.js, kj::str(h.name), kj::str(h.value)); } else { - headers->set( - env.js, jsg::ByteString(kj::str(h.name)), jsg::ByteString(kj::str(h.value))); + headers->set(env.js, kj::str(h.name), kj::str(h.value)); } } benchmark::DoNotOptimize(i); diff --git a/src/workerd/tests/bench-response.c++ b/src/workerd/tests/bench-response.c++ index e53d23202cf..0da68a1c4c6 100644 --- a/src/workerd/tests/bench-response.c++ +++ b/src/workerd/tests/bench-response.c++ @@ -57,10 +57,10 @@ BENCHMARK_F(Response, bodyWithHeaders)(benchmark::State& state) { auto& js = env.js; for (auto _: state) { api::Response::InitializerDict init; - jsg::Dict headersDict; - headersDict.fields = kj::heapArray::Field>(1); - headersDict.fields[0].name = jsg::ByteString(kj::str("Content-Type")); - headersDict.fields[0].value = jsg::ByteString(kj::str("text/html")); + jsg::Dict headersDict; + headersDict.fields = kj::heapArray::Field>(1); + headersDict.fields[0].name = kj::str("Content-Type"); + headersDict.fields[0].value = kj::str("text/html"); init.headers = kj::mv(headersDict); auto body = api::Body::Initializer(kj::str("Hello World")); diff --git a/src/workerd/util/BUILD.bazel b/src/workerd/util/BUILD.bazel index 6466a82be07..087d1dad305 100644 --- a/src/workerd/util/BUILD.bazel +++ b/src/workerd/util/BUILD.bazel @@ -260,6 +260,15 @@ wd_cc_library( deps = ["@capnp-cpp//src/kj"], ) +wd_cc_library( + name = "header-validation", + hdrs = ["header-validation.h"], + visibility = ["//visibility:public"], + deps = [ + "@capnp-cpp//src/kj", + ], +) + wd_cc_library( name = "websocket-error-handler", srcs = ["websocket-error-handler.c++"], diff --git a/src/workerd/util/header-validation.h b/src/workerd/util/header-validation.h new file mode 100644 index 00000000000..1a0b934790c --- /dev/null +++ b/src/workerd/util/header-validation.h @@ -0,0 +1,267 @@ +// Copyright (c) 2017-2025 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +#pragma once + +#include +#include + +#include + +// clang-tidy doesn't like some of our use of intrinsics here. +// NOLINTBEGIN + +// Platform-specific intrinsics headers +#if defined(__AVX2__) +#include +#elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86) +#include +#endif + +#if defined(__ARM_NEON) || defined(__aarch64__) +#include +#endif + +namespace workerd::util { + +// SIMD-accelerated validation for HTTP header values. +// Checks that the value contains no NULL (0x00), CR (0x0D), or LF (0x0A) characters. +// Returns true if the value is valid, false otherwise. +// +// This function automatically selects the best implementation based on available CPU features: +// - AVX2 for modern x86/x64 (32 bytes per iteration) +// - SSE2 for older x86/x64 (16 bytes per iteration) +// - NEON for ARM/ARM64 (16 bytes per iteration) +// - Scalar fallback for unsupported platforms or short strings + +#if defined(__AVX2__) +// AVX2 implementation: Process 32 bytes at a time +inline bool isValidHeaderValueSIMD_AVX2(const char* ptr, size_t len) { + const __m256i zero = _mm256_setzero_si256(); + const __m256i cr = _mm256_set1_epi8('\r'); + const __m256i lf = _mm256_set1_epi8('\n'); + + while (len >= 32) { + __m256i chunk = _mm256_loadu_si256(reinterpret_cast(ptr)); + + // Compare against invalid characters + __m256i nulls = _mm256_cmpeq_epi8(chunk, zero); + __m256i crs = _mm256_cmpeq_epi8(chunk, cr); + __m256i lfs = _mm256_cmpeq_epi8(chunk, lf); + + // Combine: any match means invalid + __m256i invalid = _mm256_or_si256(_mm256_or_si256(nulls, crs), lfs); + + // Check if any byte matched (non-zero mask means invalid char found) + uint32_t mask = _mm256_movemask_epi8(invalid); + if (__builtin_expect(mask != 0, 0)) { + return false; + } + + ptr += 32; + len -= 32; + } + + // Process remaining 16-31 bytes with SSE2 if available + if (len >= 16) { + __m128i chunk = _mm_loadu_si128(reinterpret_cast(ptr)); + __m128i zero_sse = _mm_setzero_si128(); + __m128i cr_sse = _mm_set1_epi8('\r'); + __m128i lf_sse = _mm_set1_epi8('\n'); + + __m128i nulls = _mm_cmpeq_epi8(chunk, zero_sse); + __m128i crs = _mm_cmpeq_epi8(chunk, cr_sse); + __m128i lfs = _mm_cmpeq_epi8(chunk, lf_sse); + + __m128i invalid = _mm_or_si128(_mm_or_si128(nulls, crs), lfs); + + int mask = _mm_movemask_epi8(invalid); + if (__builtin_expect(mask != 0, 0)) { + return false; + } + + ptr += 16; + len -= 16; + } + + // Scalar fallback for remaining 0-15 bytes + for (size_t i = 0; i < len; ++i) { + char c = ptr[i]; + if (c == '\0' || c == '\r' || c == '\n') { + return false; + } + } + + return true; +} +#endif // __AVX2__ + +#if defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86) +// SSE2 implementation: Process 16 bytes at a time +inline bool isValidHeaderValueSIMD_SSE2(const char* ptr, size_t len) { + const __m128i zero = _mm_setzero_si128(); + const __m128i cr = _mm_set1_epi8('\r'); + const __m128i lf = _mm_set1_epi8('\n'); + + while (len >= 16) { + __m128i chunk = _mm_loadu_si128(reinterpret_cast(ptr)); + + // Compare against invalid characters + __m128i nulls = _mm_cmpeq_epi8(chunk, zero); + __m128i crs = _mm_cmpeq_epi8(chunk, cr); + __m128i lfs = _mm_cmpeq_epi8(chunk, lf); + + // Combine: any match means invalid + __m128i invalid = _mm_or_si128(_mm_or_si128(nulls, crs), lfs); + + // Check if any byte matched (non-zero mask means invalid char found) + int mask = _mm_movemask_epi8(invalid); + if (__builtin_expect(mask != 0, 0)) { + return false; + } + + ptr += 16; + len -= 16; + } + + // Scalar fallback for remaining 0-15 bytes + for (size_t i = 0; i < len; ++i) { + char c = ptr[i]; + if (c == '\0' || c == '\r' || c == '\n') { + return false; + } + } + + return true; +} +#endif // SSE2 + +#if defined(__ARM_NEON) || defined(__aarch64__) +// ARM NEON implementation: Process 16 bytes at a time +inline bool isValidHeaderValueSIMD_NEON(const char* ptr, size_t len) { + const uint8x16_t zero = vdupq_n_u8(0); + const uint8x16_t cr = vdupq_n_u8('\r'); + const uint8x16_t lf = vdupq_n_u8('\n'); + + while (len >= 16) { + uint8x16_t chunk = vld1q_u8(reinterpret_cast(ptr)); + + // Compare operations + uint8x16_t is_null = vceqq_u8(chunk, zero); + uint8x16_t is_cr = vceqq_u8(chunk, cr); + uint8x16_t is_lf = vceqq_u8(chunk, lf); + + // Combine + uint8x16_t invalid = vorrq_u8(vorrq_u8(is_null, is_cr), is_lf); + + // Check if any lane is set (vmaxvq returns maximum value across vector) + if (__builtin_expect(vmaxvq_u8(invalid) != 0, 0)) { + return false; + } + + ptr += 16; + len -= 16; + } + + // Scalar fallback for remaining 0-15 bytes + for (size_t i = 0; i < len; ++i) { + char c = ptr[i]; + if (c == '\0' || c == '\r' || c == '\n') { + return false; + } + } + + return true; +} +#endif // ARM_NEON + +// Scalar fallback implementation for platforms without SIMD support +inline bool isValidHeaderValueScalar(const char* ptr, size_t len) { + for (size_t i = 0; i < len; ++i) { + char c = ptr[i]; + if (c == '\0' || c == '\r' || c == '\n') { + return false; + } + } + return true; +} + +// Main entry point: Automatically dispatches to the best available implementation +inline bool isValidHeaderValue(kj::ArrayPtr value) { + const char* ptr = value.begin(); + size_t len = value.size(); + + // Empty strings are valid + if (len == 0) return true; + + // Dispatch to best available implementation +#if defined(__AVX2__) + return isValidHeaderValueSIMD_AVX2(ptr, len); +#elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) + return isValidHeaderValueSIMD_SSE2(ptr, len); +#elif defined(__ARM_NEON) || defined(__aarch64__) + return isValidHeaderValueSIMD_NEON(ptr, len); +#else + return isValidHeaderValueScalar(ptr, len); +#endif +} + +// Bitfield flags for HTTP character lookup table +constexpr uint8_t HTTP_TOKEN_CHAR = 0x01; // Valid HTTP token character +constexpr uint8_t HTTP_WHITESPACE = 0x02; // HTTP whitespace (tab, space, CR, LF) + +// Fast lookup table for HTTP character validation using bitfields (RFC 2616). +// Combines checks for: token chars and HTTP whitespace. +// Valid token chars are: !#$%&'*+-.0-9A-Z^_`a-z|~ +// (i.e., any CHAR except CTLs or separators) +// HTTP whitespace chars are: tab, space, CR, LF +static constexpr uint8_t HTTP_TOKEN_CHAR_TABLE[] = { + // Control characters 0x00-0x1F and 0x7F are invalid + 0, 0, 0, 0, 0, 0, 0, 0, // 0x00-0x07 + 0, 2, 2, 0, 0, 2, 0, 0, // 0x08-0x0F (tab=2, LF=2, CR=2) + 0, 0, 0, 0, 0, 0, 0, 0, // 0x10-0x17 + 0, 0, 0, 0, 0, 0, 0, 0, // 0x18-0x1F + 2, 1, 0, 1, 1, 1, 1, 1, // 0x20-0x27: SP!"#$%&' + 0, 0, 1, 1, 0, 1, 1, 0, // 0x28-0x2F: ()*+,-./ + 1, 1, 1, 1, 1, 1, 1, 1, // 0x30-0x37: 01234567 + 1, 1, 0, 0, 0, 0, 0, 0, // 0x38-0x3F: 89:;<=>? + 0, 1, 1, 1, 1, 1, 1, 1, // 0x40-0x47: @ABCDEFG + 1, 1, 1, 1, 1, 1, 1, 1, // 0x48-0x4F: HIJKLMNO + 1, 1, 1, 1, 1, 1, 1, 1, // 0x50-0x57: PQRSTUVW + 1, 1, 1, 0, 0, 0, 1, 1, // 0x58-0x5F: XYZ[\]^_ + 1, 1, 1, 1, 1, 1, 1, 1, // 0x60-0x67: `abcdefg + 1, 1, 1, 1, 1, 1, 1, 1, // 0x68-0x6F: hijklmno + 1, 1, 1, 1, 1, 1, 1, 1, // 0x70-0x77: pqrstuvw + 1, 1, 1, 0, 1, 0, 1, 0, // 0x78-0x7F: xyz{|}~DEL + // Extended ASCII 0x80-0xFF are all invalid per RFC 2616 + 0, 0, 0, 0, 0, 0, 0, 0, // 0x80-0x87 + 0, 0, 0, 0, 0, 0, 0, 0, // 0x88-0x8F + 0, 0, 0, 0, 0, 0, 0, 0, // 0x90-0x97 + 0, 0, 0, 0, 0, 0, 0, 0, // 0x98-0x9F + 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0-0xA7 + 0, 0, 0, 0, 0, 0, 0, 0, // 0xA8-0xAF + 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0-0xB7 + 0, 0, 0, 0, 0, 0, 0, 0, // 0xB8-0xBF + 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0-0xC7 + 0, 0, 0, 0, 0, 0, 0, 0, // 0xC8-0xCF + 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0-0xD7 + 0, 0, 0, 0, 0, 0, 0, 0, // 0xD8-0xDF + 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0-0xE7 + 0, 0, 0, 0, 0, 0, 0, 0, // 0xE8-0xEF + 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0-0xF7 + 0, 0, 0, 0, 0, 0, 0, 0, // 0xF8-0xFF +}; + +inline constexpr bool isHttpWhitespace(char c) { + return HTTP_TOKEN_CHAR_TABLE[static_cast(c)] & HTTP_WHITESPACE; +} +static_assert(isHttpWhitespace(' ')); +static_assert(!isHttpWhitespace('A')); +inline constexpr bool isHttpTokenChar(char c) { + return HTTP_TOKEN_CHAR_TABLE[static_cast(c)] & HTTP_TOKEN_CHAR; +} +static_assert(isHttpTokenChar('A')); +static_assert(!isHttpTokenChar(' ')); +} // namespace workerd::util +// NOLINTEND diff --git a/src/wpt/fetch/api-test.ts b/src/wpt/fetch/api-test.ts index 00c4fdd061b..ed239bf3a18 100644 --- a/src/wpt/fetch/api-test.ts +++ b/src/wpt/fetch/api-test.ts @@ -392,15 +392,8 @@ export default { 'headers/headers-errors.any.js': { comment: 'Our validation of header names is too lax', expectedFailures: [ - 'Create headers giving bad header name as init argument', 'Create headers giving bad header value as init argument', - 'Check headers get with an invalid name invalidĀ', - 'Check headers delete with an invalid name invalidĀ', - 'Check headers has with an invalid name invalidĀ', - 'Check headers set with an invalid name invalidĀ', 'Check headers set with an invalid value invalidĀ', - 'Check headers append with an invalid name invalidĀ', - 'Check headers append with an invalid name [object Object]', 'Check headers append with an invalid value invalidĀ', ], },