44
55#include < workerd/io/features.h>
66#include < workerd/io/io-context.h>
7+ #include < workerd/util/header-validation.h>
78#include < workerd/util/strings.h>
89
9- #ifdef _MSC_VER
10- #define strncasecmp _strnicmp
11- #define strcasecmp _stricmp
12- #endif
13-
1410namespace workerd ::api {
1511
1612namespace {
1713// If any more headers are added to the CommonHeaderName enum later, we should be careful about
1814// introducing them into serialization. We need to roll out a change that recognizes the new IDs
1915// before rolling out a change that sends them. MAX_COMMON_HEADER_ID is the max value we're willing
2016// to send.
21- static constexpr size_t MAX_COMMON_HEADER_ID =
17+ constexpr size_t MAX_COMMON_HEADER_ID =
2218 static_cast <size_t >(capnp::CommonHeaderName::WWW_AUTHENTICATE);
2319
2420#define COMMON_HEADERS (V ) \
@@ -74,47 +70,98 @@ static constexpr size_t MAX_COMMON_HEADER_ID =
7470// and must be kept in sync with the ordinal values defined in http-over-capnp.capnp). Since
7571// it is extremely unlikely that those will change often, we hardcode them here for runtime
7672// efficiency.
77- #define V (Name ) Name,
78- static constexpr const char * COMMON_HEADER_NAMES[] = {nullptr , // 0: invalid
73+ //
74+ // TODO(perf): We can potentially optimize this further by using the mechanisms within
75+ // http-over-capnp, which also has a mapping of common header names to kj::HttpHeaderIds.
76+ // However, accessing that functionality requires some amount of new API to be added to
77+ // capnproto which needs to be carefully weighed. There's also the fact that, currently,
78+ // the HttpOverCapnpFactory is accessed via IoContext and the Headers object can be
79+ // created outside of an IoContext. Some amount of additional refactoring would be needed
80+ // to make it work. For now, this hardcoded table is sufficient and efficient enough.
81+ #define V (Name ) Name##_kj,
82+ constexpr kj::StringPtr COMMON_HEADER_NAMES[] = {nullptr , // 0: invalid
7983 COMMON_HEADERS (V)};
8084#undef V
8185
82- constexpr size_t constexprStrlen (const char * str) {
83- return *str ? 1 + constexprStrlen (str + 1 ) : 0 ;
86+ inline constexpr kj::StringPtr getCommonHeaderName (uint id) {
87+ KJ_ASSERT (id > 0 && id <= MAX_COMMON_HEADER_ID, " Invalid common header ID" );
88+ return COMMON_HEADER_NAMES[id];
8489}
8590
86- // Helper to avoid recalculating lengths of common headers at runtime repeatedly
87- static constexpr size_t COMMON_HEADER_NAME_LENGTHS[] = {0 , // 0: invalid (nullptr)
88- #define V (n ) constexprStrlen(n),
89- COMMON_HEADERS (V)
90- #undef V
91- };
91+ constexpr bool strcaseeq (kj::StringPtr a, kj::StringPtr b) {
92+ if (a.size () != b.size ()) return false ;
93+ for (size_t i = 0 ; i < a.size (); ++i) {
94+ char ca = a[i];
95+ char cb = b[i];
96+ // Convert to lowercase for comparison
97+ if (' A' <= ca && ca <= ' Z' ) ca += 32 ;
98+ if (' A' <= cb && cb <= ' Z' ) cb += 32 ;
99+ if (ca != cb) return false ;
100+ }
101+ return true ;
102+ }
92103
93- inline constexpr kj::StringPtr getCommonHeaderName (uint id) {
94- KJ_ASSERT (id > 0 && id <= MAX_COMMON_HEADER_ID, " Invalid common header ID" );
95- kj::StringPtr name = COMMON_HEADER_NAMES[id];
96- KJ_DASSERT (name != nullptr );
97- return name;
98- }
99-
100- // Case-insensitive lookup of common header ID. This avoids allocating a lowercase copy
101- // when the header is common. Returns kj::none if not a common header.
102- // TODO(perf): It's possible to optimize this further with a good hash function but
103- // for now a linear scan is sufficient.
104- constexpr kj::Maybe<uint> getCommonHeaderId (kj::StringPtr name) {
105- size_t len = name.size ();
106- if (len == 0 ) return kj::none;
107- for (uint i = 1 ; i <= MAX_COMMON_HEADER_ID; ++i) {
108- KJ_DASSERT (COMMON_HEADER_NAMES[i] != nullptr );
109- // If the lengths don't match or the first character doesn't match, skip full comparison
110- if (len != COMMON_HEADER_NAME_LENGTHS[i]) continue ;
111- if (strncasecmp (name.begin (), COMMON_HEADER_NAMES[i], len) == 0 ) {
112- return i;
113- }
104+ constexpr uint caseInsensitiveHash (kj::StringPtr name) {
105+ uint hash = 2166136261u ;
106+ for (size_t i = 0 ; i < name.size (); ++i) {
107+ char c = name[i];
108+ if (' A' <= c && c <= ' Z' ) c += 32 ;
109+ hash ^= static_cast <uint8_t >(c);
110+ hash *= 16777619u ;
114111 }
115- return kj::none ;
112+ return hash ;
116113}
117114
115+ constexpr size_t HEADER_MAP_SIZE = 128 ;
116+
117+ // Constexpr hash table for case-insensitive mapping of header names to their
118+ // common header id (if any).
119+ struct HeaderHashTable final {
120+ struct Entry {
121+ kj::StringPtr name;
122+ uint id;
123+ };
124+
125+ Entry entries[HEADER_MAP_SIZE] = {};
126+
127+ constexpr HeaderHashTable () {
128+ for (size_t i = 0 ; i < HEADER_MAP_SIZE; ++i) {
129+ entries[i] = {nullptr , 0 };
130+ }
131+
132+ for (uint i = 1 ; i <= MAX_COMMON_HEADER_ID; ++i) {
133+ auto name = COMMON_HEADER_NAMES[i];
134+ size_t slot = caseInsensitiveHash (name) % HEADER_MAP_SIZE;
135+ while (entries[slot].id != 0 ) {
136+ slot = (slot + 1 ) % HEADER_MAP_SIZE;
137+ }
138+ entries[slot] = {name, i};
139+ }
140+ }
141+
142+ constexpr uint find (kj::StringPtr name) const {
143+ if (name == nullptr ) return 0 ;
144+
145+ size_t slot = caseInsensitiveHash (name) % HEADER_MAP_SIZE;
146+
147+ // Linear probe until we find a match or empty slot
148+ for (size_t probes = 0 ; probes < HEADER_MAP_SIZE; ++probes) {
149+ const auto & entry = entries[slot];
150+ if (entry.id == 0 ) return 0 ;
151+ if (entry.name .size () == name.size () && strcaseeq (entry.name , name)) {
152+ return entry.id ;
153+ }
154+ slot = (slot + 1 ) % HEADER_MAP_SIZE;
155+ }
156+ return 0 ; // Not found
157+ }
158+ };
159+
160+ constexpr HeaderHashTable HEADER_HASH_TABLE;
161+ // Quick check to verify that the hash table is constructed correctly.
162+ static_assert (HEADER_HASH_TABLE.find(" accept-charset" _kj) == 1 );
163+ static_assert (HEADER_HASH_TABLE.find(" AcCePt-ChArSeT" _kj) == 1 );
164+
118165static_assert (std::size(COMMON_HEADER_NAMES) == (MAX_COMMON_HEADER_ID + 1 ));
119166
120167void warnIfBadHeaderString (const jsg::ByteString& byteString) {
@@ -156,14 +203,15 @@ void warnIfBadHeaderString(const jsg::ByteString& byteString) {
156203 }
157204}
158205
159- // TODO(perf): This can be optimized further using a lookup table.
160- constexpr bool isHttpWhitespace (char c) {
161- return c == ' \t ' || c == ' \r ' || c == ' \n ' || c == ' ' ;
206+ inline constexpr void requireValidHeaderValue (kj::StringPtr value) {
207+ JSG_REQUIRE (workerd::util::isValidHeaderValue (value), TypeError, " Invalid header value." );
162208}
163209
164- // TODO(perf): This can be optimized further using a lookup table.
165- constexpr bool isValidHeaderValueChar (char c) {
166- return c != ' \0 ' && c != ' \r ' && c != ' \n ' ;
210+ inline constexpr void requireValidHeaderName (const jsg::ByteString& name) {
211+ warnIfBadHeaderString (name);
212+ for (char c: name) {
213+ JSG_REQUIRE (util::isHttpTokenChar (c), TypeError, " Invalid header name." );
214+ }
167215}
168216
169217// Left- and right-trim HTTP whitespace from `value`.
@@ -175,105 +223,33 @@ jsg::ByteString normalizeHeaderValue(jsg::Lock& js, jsg::ByteString value) {
175223 char * begin = value.begin ();
176224 char * end = value.end ();
177225
178- while (begin < end && isHttpWhitespace (*begin)) ++begin;
179- while (begin < end && isHttpWhitespace (*(end - 1 ))) --end;
226+ while (begin < end && util:: isHttpWhitespace (*begin)) ++begin;
227+ while (begin < end && util:: isHttpWhitespace (*(end - 1 ))) --end;
180228
181229 size_t newSize = end - begin;
182230 if (newSize == value.size ()) return kj::mv (value);
183231
184232 return jsg::ByteString (kj::str (kj::ArrayPtr (begin, newSize)));
185233}
186-
187- // Fast lookup table for valid HTTP token characters (RFC 2616).
188- // Valid token chars are: !#$%&'*+-.0-9A-Z^_`a-z|~
189- // (i.e., any CHAR except CTLs or separators)
190- static constexpr uint8_t HTTP_TOKEN_CHAR_TABLE[] = {
191- // Control characters 0x00-0x1F and 0x7F are invalid
192- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0x00-0x07
193- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0x08-0x0F
194- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0x10-0x17
195- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0x18-0x1F
196- 0 , 1 , 0 , 1 , 1 , 1 , 1 , 1 , // 0x20-0x27: SP!"#$%&'
197- 0 , 0 , 1 , 1 , 0 , 1 , 1 , 0 , // 0x28-0x2F: ()*+,-./
198- 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , // 0x30-0x37: 01234567
199- 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , // 0x38-0x3F: 89:;<=>?
200- 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , // 0x40-0x47: @ABCDEFG
201- 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , // 0x48-0x4F: HIJKLMNO
202- 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , // 0x50-0x57: PQRSTUVW
203- 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , // 0x58-0x5F: XYZ[\]^_
204- 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , // 0x60-0x67: `abcdefg
205- 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , // 0x68-0x6F: hijklmno
206- 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , // 0x70-0x77: pqrstuvw
207- 1 , 1 , 1 , 0 , 1 , 0 , 1 , 0 , // 0x78-0x7F: xyz{|}~DEL
208- // Extended ASCII 0x80-0xFF are all invalid per RFC 2616
209- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0x80-0x87
210- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0x88-0x8F
211- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0x90-0x97
212- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0x98-0x9F
213- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0xA0-0xA7
214- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0xA8-0xAF
215- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0xB0-0xB7
216- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0xB8-0xBF
217- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0xC0-0xC7
218- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0xC8-0xCF
219- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0xD0-0xD7
220- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0xD8-0xDF
221- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0xE0-0xE7
222- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0xE8-0xEF
223- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0xF0-0xF7
224- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // 0xF8-0xFF
225- };
226-
227- inline void requireValidHeaderName (const jsg::ByteString& name) {
228- // TODO(cleanup): Code duplication with kj/compat/http.c++
229- warnIfBadHeaderString (name);
230-
231- for (char c: name) {
232- JSG_REQUIRE (HTTP_TOKEN_CHAR_TABLE[static_cast <uint8_t >(c)], TypeError, " Invalid header name." );
233- }
234- }
235-
236- inline void requireValidHeaderValue (kj::StringPtr value) {
237- for (char c: value) {
238- JSG_REQUIRE (isValidHeaderValueChar (c), TypeError, " Invalid header value." );
239- }
240- }
241234} // namespace
242235
243- Headers::UncommonHeaderKey::UncommonHeaderKey (kj::String name)
244- : name(kj::mv(name)),
245- hash (kj::hashCode(this ->name)) {}
246-
247- Headers::UncommonHeaderKey::UncommonHeaderKey (kj::StringPtr name)
248- : name(kj::str(name)),
249- hash(kj::hashCode(this ->name)) {}
250-
251- bool Headers::UncommonHeaderKey::operator ==(const UncommonHeaderKey& other) const {
252- // The same hash code is a necessary but not sufficient condition for equality.
253- return hash == other.hash && name == other.name ;
254- }
255-
256- bool Headers::UncommonHeaderKey::operator ==(kj::StringPtr otherName) const {
257- if (name.size () != otherName.size ()) return false ;
258- return strncasecmp (name.begin (), otherName.begin (), name.size ()) == 0 ;
259- }
260-
261236Headers::HeaderKey Headers::getHeaderKeyFor (kj::StringPtr name) {
262- KJ_IF_SOME (commonId, getCommonHeaderId (name)) {
237+ if (uint commonId = HEADER_HASH_TABLE.find (name)) {
238+ KJ_DASSERT (commonId > 0 && commonId <= MAX_COMMON_HEADER_ID);
263239 return commonId;
264240 }
265241
266242 // Not a common header, so allocate lowercase copy for uncommon header
267- return UncommonHeaderKey ( toLower (name) );
243+ return toLower (name);
268244}
269245
270246Headers::HeaderKey Headers::cloneHeaderKey (const HeaderKey& key) {
271247 KJ_SWITCH_ONEOF (key) {
272248 KJ_CASE_ONEOF (commonId, uint) {
273249 return commonId;
274250 }
275- KJ_CASE_ONEOF (uncommonKey, UncommonHeaderKey ) {
276- return uncommonKey. clone ( );
251+ KJ_CASE_ONEOF (uncommonKey, kj::String ) {
252+ return kj::str (uncommonKey );
277253 }
278254 }
279255 KJ_UNREACHABLE;
@@ -284,7 +260,7 @@ bool Headers::isSetCookie(const HeaderKey& key) {
284260 KJ_CASE_ONEOF (commonId, uint) {
285261 return commonId == static_cast <uint>(capnp::CommonHeaderName::SET_COOKIE);
286262 }
287- KJ_CASE_ONEOF (uncommonKey, UncommonHeaderKey ) {
263+ KJ_CASE_ONEOF (uncommonKey, kj::String ) {
288264 // This case really shouldn't happen since "set-cookie" is a common header,
289265 // but just in case...
290266 return uncommonKey == " set-cookie" ;
@@ -294,21 +270,7 @@ bool Headers::isSetCookie(const HeaderKey& key) {
294270}
295271
296272bool Headers::headerKeyEquals (const HeaderKey& a, const HeaderKey& b) {
297- KJ_SWITCH_ONEOF (a) {
298- KJ_CASE_ONEOF (aCommonId, uint) {
299- KJ_IF_SOME (bCommonId, b.tryGet <uint>()) {
300- return aCommonId == bCommonId;
301- }
302- return false ;
303- }
304- KJ_CASE_ONEOF (aUncommonKey, UncommonHeaderKey) {
305- KJ_IF_SOME (bUncommonKey, b.tryGet <UncommonHeaderKey>()) {
306- return aUncommonKey == bUncommonKey;
307- }
308- return false ;
309- }
310- }
311- KJ_UNREACHABLE;
273+ return a == b;
312274}
313275
314276Headers::Header::Header (jsg::ByteString name, kj::Vector<jsg::ByteString> values)
@@ -354,8 +316,8 @@ kj::StringPtr Headers::Header::Header::getKeyName() const {
354316 KJ_CASE_ONEOF (commonId, uint) {
355317 return COMMON_HEADER_NAMES[commonId];
356318 }
357- KJ_CASE_ONEOF (uncommonKey, UncommonHeaderKey ) {
358- return uncommonKey. getName () ;
319+ KJ_CASE_ONEOF (uncommonKey, kj::String ) {
320+ return uncommonKey;
359321 }
360322 }
361323 KJ_UNREACHABLE;
@@ -394,15 +356,15 @@ kj::uint Headers::HeaderCallbacks::hashCode(const HeaderKey& key) {
394356 KJ_CASE_ONEOF (commonId, uint) {
395357 return kj::hashCode (commonId);
396358 }
397- KJ_CASE_ONEOF (uncommonKey, UncommonHeaderKey ) {
398- return uncommonKey. hashCode ();
359+ KJ_CASE_ONEOF (uncommonKey, kj::String ) {
360+ return kj:: hashCode (uncommonKey );
399361 }
400362 }
401363 KJ_UNREACHABLE;
402364}
403365
404366kj::uint Headers::HeaderCallbacks::hashCode (capnp::CommonHeaderName commondId) {
405- return kj::hashCode (static_cast <uint>( commondId) );
367+ return kj::hashCode (commondId);
406368}
407369
408370Headers::Headers (jsg::Lock& js, jsg::Dict<jsg::ByteString, jsg::ByteString> dict)
@@ -580,7 +542,7 @@ kj::ArrayPtr<jsg::ByteString> Headers::getSetCookie() {
580542kj::ArrayPtr<jsg::ByteString> Headers::getAll (jsg::ByteString name) {
581543 requireValidHeaderName (name);
582544
583- if (strcasecmp (name. cStr () , " set-cookie" ) != 0 ) {
545+ if (! strcaseeq (name, " set-cookie" _kj) ) {
584546 JSG_FAIL_REQUIRE (TypeError, " getAll() can only be used with the header name \" Set-Cookie\" ." );
585547 }
586548
@@ -776,7 +738,7 @@ void Headers::serialize(jsg::Lock& js, jsg::Serializer& serializer) {
776738 KJ_CASE_ONEOF (commonId, uint) {
777739 serializer.writeRawUint32 (commonId);
778740 }
779- KJ_CASE_ONEOF (uncommonKey, UncommonHeaderKey ) {
741+ KJ_CASE_ONEOF (_, kj::String ) {
780742 serializer.writeRawUint32 (0 );
781743 serializer.writeLengthDelimited (header.getHeaderName ());
782744 }
0 commit comments