Skip to content

Commit 37b85f0

Browse files
committed
Refine the header validation performance further
1 parent fb81688 commit 37b85f0

File tree

5 files changed

+383
-188
lines changed

5 files changed

+383
-188
lines changed

src/workerd/api/headers.c++

Lines changed: 108 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,17 @@
44

55
#include <workerd/io/features.h>
66
#include <workerd/io/io-context.h>
7+
#include <workerd/util/header-validation.h>
78
#include <workerd/util/strings.h>
89

9-
#ifdef _MSC_VER
10-
#define strncasecmp _strnicmp
11-
#define strcasecmp _stricmp
12-
#endif
13-
1410
namespace workerd::api {
1511

1612
namespace {
1713
// If any more headers are added to the CommonHeaderName enum later, we should be careful about
1814
// introducing them into serialization. We need to roll out a change that recognizes the new IDs
1915
// before rolling out a change that sends them. MAX_COMMON_HEADER_ID is the max value we're willing
2016
// to send.
21-
static constexpr size_t MAX_COMMON_HEADER_ID =
17+
constexpr size_t MAX_COMMON_HEADER_ID =
2218
static_cast<size_t>(capnp::CommonHeaderName::WWW_AUTHENTICATE);
2319

2420
#define COMMON_HEADERS(V) \
@@ -74,47 +70,98 @@ static constexpr size_t MAX_COMMON_HEADER_ID =
7470
// and must be kept in sync with the ordinal values defined in http-over-capnp.capnp). Since
7571
// it is extremely unlikely that those will change often, we hardcode them here for runtime
7672
// efficiency.
77-
#define V(Name) Name,
78-
static constexpr const char* COMMON_HEADER_NAMES[] = {nullptr, // 0: invalid
73+
//
74+
// TODO(perf): We can potentially optimize this further by using the mechanisms within
75+
// http-over-capnp, which also has a mapping of common header names to kj::HttpHeaderIds.
76+
// However, accessing that functionality requires some amount of new API to be added to
77+
// capnproto which needs to be carefully weighed. There's also the fact that, currently,
78+
// the HttpOverCapnpFactory is accessed via IoContext and the Headers object can be
79+
// created outside of an IoContext. Some amount of additional refactoring would be needed
80+
// to make it work. For now, this hardcoded table is sufficient and efficient enough.
81+
#define V(Name) Name##_kj,
82+
constexpr kj::StringPtr COMMON_HEADER_NAMES[] = {nullptr, // 0: invalid
7983
COMMON_HEADERS(V)};
8084
#undef V
8185

82-
constexpr size_t constexprStrlen(const char* str) {
83-
return *str ? 1 + constexprStrlen(str + 1) : 0;
86+
inline constexpr kj::StringPtr getCommonHeaderName(uint id) {
87+
KJ_ASSERT(id > 0 && id <= MAX_COMMON_HEADER_ID, "Invalid common header ID");
88+
return COMMON_HEADER_NAMES[id];
8489
}
8590

86-
// Helper to avoid recalculating lengths of common headers at runtime repeatedly
87-
static constexpr size_t COMMON_HEADER_NAME_LENGTHS[] = {0, // 0: invalid (nullptr)
88-
#define V(n) constexprStrlen(n),
89-
COMMON_HEADERS(V)
90-
#undef V
91-
};
91+
constexpr bool strcaseeq(kj::StringPtr a, kj::StringPtr b) {
92+
if (a.size() != b.size()) return false;
93+
for (size_t i = 0; i < a.size(); ++i) {
94+
char ca = a[i];
95+
char cb = b[i];
96+
// Convert to lowercase for comparison
97+
if ('A' <= ca && ca <= 'Z') ca += 32;
98+
if ('A' <= cb && cb <= 'Z') cb += 32;
99+
if (ca != cb) return false;
100+
}
101+
return true;
102+
}
92103

93-
inline constexpr kj::StringPtr getCommonHeaderName(uint id) {
94-
KJ_ASSERT(id > 0 && id <= MAX_COMMON_HEADER_ID, "Invalid common header ID");
95-
kj::StringPtr name = COMMON_HEADER_NAMES[id];
96-
KJ_DASSERT(name != nullptr);
97-
return name;
98-
}
99-
100-
// Case-insensitive lookup of common header ID. This avoids allocating a lowercase copy
101-
// when the header is common. Returns kj::none if not a common header.
102-
// TODO(perf): It's possible to optimize this further with a good hash function but
103-
// for now a linear scan is sufficient.
104-
constexpr kj::Maybe<uint> getCommonHeaderId(kj::StringPtr name) {
105-
size_t len = name.size();
106-
if (len == 0) return kj::none;
107-
for (uint i = 1; i <= MAX_COMMON_HEADER_ID; ++i) {
108-
KJ_DASSERT(COMMON_HEADER_NAMES[i] != nullptr);
109-
// If the lengths don't match or the first character doesn't match, skip full comparison
110-
if (len != COMMON_HEADER_NAME_LENGTHS[i]) continue;
111-
if (strncasecmp(name.begin(), COMMON_HEADER_NAMES[i], len) == 0) {
112-
return i;
113-
}
104+
constexpr uint caseInsensitiveHash(kj::StringPtr name) {
105+
uint hash = 2166136261u;
106+
for (size_t i = 0; i < name.size(); ++i) {
107+
char c = name[i];
108+
if ('A' <= c && c <= 'Z') c += 32;
109+
hash ^= static_cast<uint8_t>(c);
110+
hash *= 16777619u;
114111
}
115-
return kj::none;
112+
return hash;
116113
}
117114

115+
constexpr size_t HEADER_MAP_SIZE = 128;
116+
117+
// Constexpr hash table for case-insensitive mapping of header names to their
118+
// common header id (if any).
119+
struct HeaderHashTable final {
120+
struct Entry {
121+
kj::StringPtr name;
122+
uint id;
123+
};
124+
125+
Entry entries[HEADER_MAP_SIZE] = {};
126+
127+
constexpr HeaderHashTable() {
128+
for (size_t i = 0; i < HEADER_MAP_SIZE; ++i) {
129+
entries[i] = {nullptr, 0};
130+
}
131+
132+
for (uint i = 1; i <= MAX_COMMON_HEADER_ID; ++i) {
133+
auto name = COMMON_HEADER_NAMES[i];
134+
size_t slot = caseInsensitiveHash(name) % HEADER_MAP_SIZE;
135+
while (entries[slot].id != 0) {
136+
slot = (slot + 1) % HEADER_MAP_SIZE;
137+
}
138+
entries[slot] = {name, i};
139+
}
140+
}
141+
142+
constexpr uint find(kj::StringPtr name) const {
143+
if (name == nullptr) return 0;
144+
145+
size_t slot = caseInsensitiveHash(name) % HEADER_MAP_SIZE;
146+
147+
// Linear probe until we find a match or empty slot
148+
for (size_t probes = 0; probes < HEADER_MAP_SIZE; ++probes) {
149+
const auto& entry = entries[slot];
150+
if (entry.id == 0) return 0;
151+
if (entry.name.size() == name.size() && strcaseeq(entry.name, name)) {
152+
return entry.id;
153+
}
154+
slot = (slot + 1) % HEADER_MAP_SIZE;
155+
}
156+
return 0; // Not found
157+
}
158+
};
159+
160+
constexpr HeaderHashTable HEADER_HASH_TABLE;
161+
// Quick check to verify that the hash table is constructed correctly.
162+
static_assert(HEADER_HASH_TABLE.find("accept-charset"_kj) == 1);
163+
static_assert(HEADER_HASH_TABLE.find("AcCePt-ChArSeT"_kj) == 1);
164+
118165
static_assert(std::size(COMMON_HEADER_NAMES) == (MAX_COMMON_HEADER_ID + 1));
119166

120167
void warnIfBadHeaderString(const jsg::ByteString& byteString) {
@@ -156,14 +203,15 @@ void warnIfBadHeaderString(const jsg::ByteString& byteString) {
156203
}
157204
}
158205

159-
// TODO(perf): This can be optimized further using a lookup table.
160-
constexpr bool isHttpWhitespace(char c) {
161-
return c == '\t' || c == '\r' || c == '\n' || c == ' ';
206+
inline constexpr void requireValidHeaderValue(kj::StringPtr value) {
207+
JSG_REQUIRE(workerd::util::isValidHeaderValue(value), TypeError, "Invalid header value.");
162208
}
163209

164-
// TODO(perf): This can be optimized further using a lookup table.
165-
constexpr bool isValidHeaderValueChar(char c) {
166-
return c != '\0' && c != '\r' && c != '\n';
210+
inline constexpr void requireValidHeaderName(const jsg::ByteString& name) {
211+
warnIfBadHeaderString(name);
212+
for (char c: name) {
213+
JSG_REQUIRE(util::isHttpTokenChar(c), TypeError, "Invalid header name.");
214+
}
167215
}
168216

169217
// Left- and right-trim HTTP whitespace from `value`.
@@ -175,105 +223,33 @@ jsg::ByteString normalizeHeaderValue(jsg::Lock& js, jsg::ByteString value) {
175223
char* begin = value.begin();
176224
char* end = value.end();
177225

178-
while (begin < end && isHttpWhitespace(*begin)) ++begin;
179-
while (begin < end && isHttpWhitespace(*(end - 1))) --end;
226+
while (begin < end && util::isHttpWhitespace(*begin)) ++begin;
227+
while (begin < end && util::isHttpWhitespace(*(end - 1))) --end;
180228

181229
size_t newSize = end - begin;
182230
if (newSize == value.size()) return kj::mv(value);
183231

184232
return jsg::ByteString(kj::str(kj::ArrayPtr(begin, newSize)));
185233
}
186-
187-
// Fast lookup table for valid HTTP token characters (RFC 2616).
188-
// Valid token chars are: !#$%&'*+-.0-9A-Z^_`a-z|~
189-
// (i.e., any CHAR except CTLs or separators)
190-
static constexpr uint8_t HTTP_TOKEN_CHAR_TABLE[] = {
191-
// Control characters 0x00-0x1F and 0x7F are invalid
192-
0, 0, 0, 0, 0, 0, 0, 0, // 0x00-0x07
193-
0, 0, 0, 0, 0, 0, 0, 0, // 0x08-0x0F
194-
0, 0, 0, 0, 0, 0, 0, 0, // 0x10-0x17
195-
0, 0, 0, 0, 0, 0, 0, 0, // 0x18-0x1F
196-
0, 1, 0, 1, 1, 1, 1, 1, // 0x20-0x27: SP!"#$%&'
197-
0, 0, 1, 1, 0, 1, 1, 0, // 0x28-0x2F: ()*+,-./
198-
1, 1, 1, 1, 1, 1, 1, 1, // 0x30-0x37: 01234567
199-
1, 1, 0, 0, 0, 0, 0, 0, // 0x38-0x3F: 89:;<=>?
200-
0, 1, 1, 1, 1, 1, 1, 1, // 0x40-0x47: @ABCDEFG
201-
1, 1, 1, 1, 1, 1, 1, 1, // 0x48-0x4F: HIJKLMNO
202-
1, 1, 1, 1, 1, 1, 1, 1, // 0x50-0x57: PQRSTUVW
203-
1, 1, 1, 0, 0, 0, 1, 1, // 0x58-0x5F: XYZ[\]^_
204-
1, 1, 1, 1, 1, 1, 1, 1, // 0x60-0x67: `abcdefg
205-
1, 1, 1, 1, 1, 1, 1, 1, // 0x68-0x6F: hijklmno
206-
1, 1, 1, 1, 1, 1, 1, 1, // 0x70-0x77: pqrstuvw
207-
1, 1, 1, 0, 1, 0, 1, 0, // 0x78-0x7F: xyz{|}~DEL
208-
// Extended ASCII 0x80-0xFF are all invalid per RFC 2616
209-
0, 0, 0, 0, 0, 0, 0, 0, // 0x80-0x87
210-
0, 0, 0, 0, 0, 0, 0, 0, // 0x88-0x8F
211-
0, 0, 0, 0, 0, 0, 0, 0, // 0x90-0x97
212-
0, 0, 0, 0, 0, 0, 0, 0, // 0x98-0x9F
213-
0, 0, 0, 0, 0, 0, 0, 0, // 0xA0-0xA7
214-
0, 0, 0, 0, 0, 0, 0, 0, // 0xA8-0xAF
215-
0, 0, 0, 0, 0, 0, 0, 0, // 0xB0-0xB7
216-
0, 0, 0, 0, 0, 0, 0, 0, // 0xB8-0xBF
217-
0, 0, 0, 0, 0, 0, 0, 0, // 0xC0-0xC7
218-
0, 0, 0, 0, 0, 0, 0, 0, // 0xC8-0xCF
219-
0, 0, 0, 0, 0, 0, 0, 0, // 0xD0-0xD7
220-
0, 0, 0, 0, 0, 0, 0, 0, // 0xD8-0xDF
221-
0, 0, 0, 0, 0, 0, 0, 0, // 0xE0-0xE7
222-
0, 0, 0, 0, 0, 0, 0, 0, // 0xE8-0xEF
223-
0, 0, 0, 0, 0, 0, 0, 0, // 0xF0-0xF7
224-
0, 0, 0, 0, 0, 0, 0, 0, // 0xF8-0xFF
225-
};
226-
227-
inline void requireValidHeaderName(const jsg::ByteString& name) {
228-
// TODO(cleanup): Code duplication with kj/compat/http.c++
229-
warnIfBadHeaderString(name);
230-
231-
for (char c: name) {
232-
JSG_REQUIRE(HTTP_TOKEN_CHAR_TABLE[static_cast<uint8_t>(c)], TypeError, "Invalid header name.");
233-
}
234-
}
235-
236-
inline void requireValidHeaderValue(kj::StringPtr value) {
237-
for (char c: value) {
238-
JSG_REQUIRE(isValidHeaderValueChar(c), TypeError, "Invalid header value.");
239-
}
240-
}
241234
} // namespace
242235

243-
Headers::UncommonHeaderKey::UncommonHeaderKey(kj::String name)
244-
: name(kj::mv(name)),
245-
hash(kj::hashCode(this->name)) {}
246-
247-
Headers::UncommonHeaderKey::UncommonHeaderKey(kj::StringPtr name)
248-
: name(kj::str(name)),
249-
hash(kj::hashCode(this->name)) {}
250-
251-
bool Headers::UncommonHeaderKey::operator==(const UncommonHeaderKey& other) const {
252-
// The same hash code is a necessary but not sufficient condition for equality.
253-
return hash == other.hash && name == other.name;
254-
}
255-
256-
bool Headers::UncommonHeaderKey::operator==(kj::StringPtr otherName) const {
257-
if (name.size() != otherName.size()) return false;
258-
return strncasecmp(name.begin(), otherName.begin(), name.size()) == 0;
259-
}
260-
261236
Headers::HeaderKey Headers::getHeaderKeyFor(kj::StringPtr name) {
262-
KJ_IF_SOME(commonId, getCommonHeaderId(name)) {
237+
if (uint commonId = HEADER_HASH_TABLE.find(name)) {
238+
KJ_DASSERT(commonId > 0 && commonId <= MAX_COMMON_HEADER_ID);
263239
return commonId;
264240
}
265241

266242
// Not a common header, so allocate lowercase copy for uncommon header
267-
return UncommonHeaderKey(toLower(name));
243+
return toLower(name);
268244
}
269245

270246
Headers::HeaderKey Headers::cloneHeaderKey(const HeaderKey& key) {
271247
KJ_SWITCH_ONEOF(key) {
272248
KJ_CASE_ONEOF(commonId, uint) {
273249
return commonId;
274250
}
275-
KJ_CASE_ONEOF(uncommonKey, UncommonHeaderKey) {
276-
return uncommonKey.clone();
251+
KJ_CASE_ONEOF(uncommonKey, kj::String) {
252+
return kj::str(uncommonKey);
277253
}
278254
}
279255
KJ_UNREACHABLE;
@@ -284,7 +260,7 @@ bool Headers::isSetCookie(const HeaderKey& key) {
284260
KJ_CASE_ONEOF(commonId, uint) {
285261
return commonId == static_cast<uint>(capnp::CommonHeaderName::SET_COOKIE);
286262
}
287-
KJ_CASE_ONEOF(uncommonKey, UncommonHeaderKey) {
263+
KJ_CASE_ONEOF(uncommonKey, kj::String) {
288264
// This case really shouldn't happen since "set-cookie" is a common header,
289265
// but just in case...
290266
return uncommonKey == "set-cookie";
@@ -294,21 +270,7 @@ bool Headers::isSetCookie(const HeaderKey& key) {
294270
}
295271

296272
bool Headers::headerKeyEquals(const HeaderKey& a, const HeaderKey& b) {
297-
KJ_SWITCH_ONEOF(a) {
298-
KJ_CASE_ONEOF(aCommonId, uint) {
299-
KJ_IF_SOME(bCommonId, b.tryGet<uint>()) {
300-
return aCommonId == bCommonId;
301-
}
302-
return false;
303-
}
304-
KJ_CASE_ONEOF(aUncommonKey, UncommonHeaderKey) {
305-
KJ_IF_SOME(bUncommonKey, b.tryGet<UncommonHeaderKey>()) {
306-
return aUncommonKey == bUncommonKey;
307-
}
308-
return false;
309-
}
310-
}
311-
KJ_UNREACHABLE;
273+
return a == b;
312274
}
313275

314276
Headers::Header::Header(jsg::ByteString name, kj::Vector<jsg::ByteString> values)
@@ -354,8 +316,8 @@ kj::StringPtr Headers::Header::Header::getKeyName() const {
354316
KJ_CASE_ONEOF(commonId, uint) {
355317
return COMMON_HEADER_NAMES[commonId];
356318
}
357-
KJ_CASE_ONEOF(uncommonKey, UncommonHeaderKey) {
358-
return uncommonKey.getName();
319+
KJ_CASE_ONEOF(uncommonKey, kj::String) {
320+
return uncommonKey;
359321
}
360322
}
361323
KJ_UNREACHABLE;
@@ -394,15 +356,15 @@ kj::uint Headers::HeaderCallbacks::hashCode(const HeaderKey& key) {
394356
KJ_CASE_ONEOF(commonId, uint) {
395357
return kj::hashCode(commonId);
396358
}
397-
KJ_CASE_ONEOF(uncommonKey, UncommonHeaderKey) {
398-
return uncommonKey.hashCode();
359+
KJ_CASE_ONEOF(uncommonKey, kj::String) {
360+
return kj::hashCode(uncommonKey);
399361
}
400362
}
401363
KJ_UNREACHABLE;
402364
}
403365

404366
kj::uint Headers::HeaderCallbacks::hashCode(capnp::CommonHeaderName commondId) {
405-
return kj::hashCode(static_cast<uint>(commondId));
367+
return kj::hashCode(commondId);
406368
}
407369

408370
Headers::Headers(jsg::Lock& js, jsg::Dict<jsg::ByteString, jsg::ByteString> dict)
@@ -580,7 +542,7 @@ kj::ArrayPtr<jsg::ByteString> Headers::getSetCookie() {
580542
kj::ArrayPtr<jsg::ByteString> Headers::getAll(jsg::ByteString name) {
581543
requireValidHeaderName(name);
582544

583-
if (strcasecmp(name.cStr(), "set-cookie") != 0) {
545+
if (!strcaseeq(name, "set-cookie"_kj)) {
584546
JSG_FAIL_REQUIRE(TypeError, "getAll() can only be used with the header name \"Set-Cookie\".");
585547
}
586548

@@ -776,7 +738,7 @@ void Headers::serialize(jsg::Lock& js, jsg::Serializer& serializer) {
776738
KJ_CASE_ONEOF(commonId, uint) {
777739
serializer.writeRawUint32(commonId);
778740
}
779-
KJ_CASE_ONEOF(uncommonKey, UncommonHeaderKey) {
741+
KJ_CASE_ONEOF(_, kj::String) {
780742
serializer.writeRawUint32(0);
781743
serializer.writeLengthDelimited(header.getHeaderName());
782744
}

0 commit comments

Comments
 (0)