Skip to content

Commit 0a4c8f0

Browse files
committed
[Speculative] Exploring if caching the uncommon header key...
... helps further improve performance at all. ... Unlikely to keep this unless we see measurable gain.
1 parent 7c52fb0 commit 0a4c8f0

File tree

2 files changed

+124
-54
lines changed

2 files changed

+124
-54
lines changed

src/workerd/api/headers.c++

Lines changed: 97 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,42 @@ inline void requireValidHeaderValue(kj::StringPtr value) {
236236
}
237237
} // namespace
238238

239+
struct Headers::HeaderKeyCache {
240+
// Small cache.. most headers are common anyway.
241+
static constexpr size_t CACHE_SIZE = 20;
242+
struct Entry final {
243+
kj::String originalName;
244+
Headers::HeaderKey cachedKey;
245+
Entry(kj::String name, Headers::HeaderKey key)
246+
: originalName(kj::mv(name)),
247+
cachedKey(kj::mv(key)) {}
248+
};
249+
250+
kj::FixedArray<kj::Maybe<Entry>, CACHE_SIZE> entries;
251+
size_t nextSlot = 0;
252+
253+
HeaderKeyCache(): entries() {}
254+
255+
HeaderKey& getOrCreateKey(kj::StringPtr name) {
256+
for (auto& maybeEntry: entries) {
257+
KJ_IF_SOME(entry, maybeEntry) {
258+
if (entry.originalName.size() != name.size()) continue;
259+
if (strncasecmp(entry.originalName.begin(), name.begin(), name.size()) == 0) {
260+
return entry.cachedKey;
261+
}
262+
}
263+
}
264+
265+
// Not found, create new entry
266+
auto slot = nextSlot;
267+
nextSlot = (nextSlot + 1) % CACHE_SIZE;
268+
entries[slot] = Entry(kj::str(name), getHeaderKeyFor(name));
269+
return KJ_ASSERT_NONNULL(entries[slot]).cachedKey;
270+
}
271+
};
272+
273+
thread_local Headers::HeaderKeyCache headerKeyCache{};
274+
239275
Headers::UncommonHeaderKey::UncommonHeaderKey(kj::String name)
240276
: name(kj::mv(name)),
241277
hash(kj::hashCode(this->name)) {}
@@ -262,16 +298,16 @@ Headers::HeaderKey Headers::getHeaderKeyFor(kj::StringPtr name) {
262298
}
263299

264300
// Not a common header, so allocate lowercase copy for uncommon header
265-
return UncommonHeaderKey(toLower(name));
301+
return kj::rc<UncommonHeaderKey>(toLower(name));
266302
}
267303

268-
Headers::HeaderKey Headers::cloneHeaderKey(const HeaderKey& key) {
304+
Headers::HeaderKey Headers::cloneHeaderKey(HeaderKey& key) {
269305
KJ_SWITCH_ONEOF(key) {
270306
KJ_CASE_ONEOF(commonId, uint) {
271307
return commonId;
272308
}
273-
KJ_CASE_ONEOF(uncommonKey, UncommonHeaderKey) {
274-
return uncommonKey.clone();
309+
KJ_CASE_ONEOF(uncommonKey, kj::Rc<UncommonHeaderKey>) {
310+
return uncommonKey.addRef();
275311
}
276312
}
277313
KJ_UNREACHABLE;
@@ -282,10 +318,10 @@ bool Headers::isSetCookie(const HeaderKey& key) {
282318
KJ_CASE_ONEOF(commonId, uint) {
283319
return commonId == static_cast<uint>(capnp::CommonHeaderName::SET_COOKIE);
284320
}
285-
KJ_CASE_ONEOF(uncommonKey, UncommonHeaderKey) {
321+
KJ_CASE_ONEOF(uncommonKey, kj::Rc<UncommonHeaderKey>) {
286322
// This case really shouldn't happen since "set-cookie" is a common header,
287323
// but just in case...
288-
return uncommonKey.equals("set-cookie");
324+
return uncommonKey->equals("set-cookie");
289325
}
290326
}
291327
KJ_UNREACHABLE;
@@ -299,9 +335,9 @@ bool Headers::headerKeyEquals(const HeaderKey& a, const HeaderKey& b) {
299335
}
300336
return false;
301337
}
302-
KJ_CASE_ONEOF(aUncommonKey, UncommonHeaderKey) {
303-
KJ_IF_SOME(bUncommonKey, b.tryGet<UncommonHeaderKey>()) {
304-
return aUncommonKey.equals(bUncommonKey);
338+
KJ_CASE_ONEOF(aUncommonKey, kj::Rc<UncommonHeaderKey>) {
339+
KJ_IF_SOME(bUncommonKey, b.tryGet<kj::Rc<UncommonHeaderKey>>()) {
340+
return aUncommonKey->equals(*bUncommonKey.get());
305341
}
306342
return false;
307343
}
@@ -310,7 +346,7 @@ bool Headers::headerKeyEquals(const HeaderKey& a, const HeaderKey& b) {
310346
}
311347

312348
Headers::Header::Header(jsg::ByteString name, kj::Vector<jsg::ByteString> values)
313-
: key(getHeaderKeyFor(name)),
349+
: key(cloneHeaderKey(headerKeyCache.getOrCreateKey(name))),
314350
values(kj::mv(values)) {
315351
if (getKeyName() != name) {
316352
// The casing of the provided name does not match the lower-cased version
@@ -321,7 +357,7 @@ Headers::Header::Header(jsg::ByteString name, kj::Vector<jsg::ByteString> values
321357
}
322358

323359
Headers::Header::Header(jsg::ByteString name, jsg::ByteString value)
324-
: key(getHeaderKeyFor(name)),
360+
: key(cloneHeaderKey(headerKeyCache.getOrCreateKey(name))),
325361
values(1) {
326362
values.add(kj::mv(value));
327363
if (getKeyName() != name) {
@@ -352,8 +388,8 @@ kj::StringPtr Headers::Header::Header::getKeyName() const {
352388
KJ_CASE_ONEOF(commonId, uint) {
353389
return COMMON_HEADER_NAMES[commonId];
354390
}
355-
KJ_CASE_ONEOF(uncommonKey, UncommonHeaderKey) {
356-
return uncommonKey.getName();
391+
KJ_CASE_ONEOF(uncommonKey, kj::Rc<UncommonHeaderKey>) {
392+
return uncommonKey->getName();
357393
}
358394
}
359395
KJ_UNREACHABLE;
@@ -366,7 +402,7 @@ kj::StringPtr Headers::Header::getHeaderName() const {
366402
return getKeyName();
367403
}
368404

369-
Headers::Header Headers::Header::clone() const {
405+
Headers::Header Headers::Header::clone() {
370406
return Header(cloneHeaderKey(key),
371407
name.map([](const kj::String& n) { return jsg::ByteString(kj::str(n)); }),
372408
KJ_MAP(value, values) { return jsg::ByteString(kj::str(value)); });
@@ -377,7 +413,7 @@ bool Headers::HeaderCallbacks::matches(Header& header, const HeaderKey& other) {
377413
}
378414

379415
bool Headers::HeaderCallbacks::matches(Header& header, kj::StringPtr otherName) {
380-
return matches(header, getHeaderKeyFor(otherName));
416+
return matches(header, headerKeyCache.getOrCreateKey(otherName));
381417
}
382418

383419
bool Headers::HeaderCallbacks::matches(Header& header, capnp::CommonHeaderName commondId) {
@@ -392,8 +428,8 @@ kj::uint Headers::HeaderCallbacks::hashCode(const HeaderKey& key) {
392428
KJ_CASE_ONEOF(commonId, uint) {
393429
return kj::hashCode(commonId);
394430
}
395-
KJ_CASE_ONEOF(uncommonKey, UncommonHeaderKey) {
396-
return uncommonKey.hashCode();
431+
KJ_CASE_ONEOF(uncommonKey, kj::Rc<UncommonHeaderKey>) {
432+
return uncommonKey->hashCode();
397433
}
398434
}
399435
KJ_UNREACHABLE;
@@ -405,14 +441,19 @@ kj::uint Headers::HeaderCallbacks::hashCode(capnp::CommonHeaderName commondId) {
405441

406442
Headers::Headers(jsg::Lock& js, jsg::Dict<jsg::ByteString, jsg::ByteString> dict)
407443
: guard(Guard::NONE) {
408-
headers.reserve(dict.fields.size());
444+
// Reserve enough space for all headers + 50% extra to extraneous rehashes
445+
// In the typical case, users won't be adding a large number of additional
446+
// headers after construction but just in case, we add some extra slack.
447+
size_t size = dict.fields.size();
448+
headers.reserve(kj::max(10, size + (size >> 1)));
409449
for (auto& field: dict.fields) {
410450
append(js, kj::mv(field.name), kj::mv(field.value));
411451
}
412452
}
413453

414-
Headers::Headers(jsg::Lock& js, const Headers& other): guard(Guard::NONE) {
415-
headers.reserve(other.headers.size());
454+
Headers::Headers(jsg::Lock& js, Headers& other): guard(Guard::NONE) {
455+
size_t size = other.headers.size();
456+
headers.reserve(kj::max(10, size + (size >> 1)));
416457
for (auto& header: other.headers) {
417458
// There really shouldn't be any duplicate headers in other, but just in case, use upsert
418459
// and we'll just ignore duplicates.
@@ -421,18 +462,44 @@ Headers::Headers(jsg::Lock& js, const Headers& other): guard(Guard::NONE) {
421462
}
422463

423464
Headers::Headers(jsg::Lock& js, const kj::HttpHeaders& other, Guard guard): guard(Guard::NONE) {
424-
headers.reserve(other.size());
425-
other.forEach([this, &js](auto name, auto value) {
465+
size_t size = other.size();
466+
headers.reserve(kj::max(10, size + (size >> 1)));
467+
468+
other.forEach([this](auto name, auto value) {
426469
// We have to copy the strings here but we can avoid normalizing and validating since
427470
// they presumably already went through that process when they were added to the
428471
// kj::HttpHeader instance.
429-
appendUnguarded(js, jsg::ByteString(kj::str(name)), jsg::ByteString(kj::str(value)));
472+
auto& key = headerKeyCache.getOrCreateKey(name);
473+
474+
auto valueStr = jsg::ByteString(kj::str(value));
475+
KJ_IF_SOME(found, headers.find(key)) {
476+
found.values.add(kj::mv(valueStr));
477+
} else {
478+
kj::Maybe<jsg::ByteString> preservedName = kj::none;
479+
auto nameStr = ([&]() -> kj::StringPtr {
480+
KJ_SWITCH_ONEOF(key) {
481+
KJ_CASE_ONEOF(commonId, uint) {
482+
return COMMON_HEADER_NAMES[commonId];
483+
}
484+
KJ_CASE_ONEOF(uncommonKey, kj::Rc<UncommonHeaderKey>) {
485+
return uncommonKey->getName();
486+
}
487+
}
488+
KJ_UNREACHABLE;
489+
})();
490+
if (nameStr != name) {
491+
preservedName = jsg::ByteString(kj::str(name));
492+
}
493+
kj::Vector<jsg::ByteString> values(1);
494+
values.add(kj::mv(valueStr));
495+
headers.insert(Header(cloneHeaderKey(key), kj::mv(preservedName), kj::mv(values)));
496+
}
430497
});
431498

432499
this->guard = guard;
433500
}
434501

435-
jsg::Ref<Headers> Headers::clone(jsg::Lock& js) const {
502+
jsg::Ref<Headers> Headers::clone(jsg::Lock& js) {
436503
auto result = js.alloc<Headers>(js, *this);
437504
result->guard = guard;
438505
return kj::mv(result);
@@ -454,7 +521,7 @@ bool Headers::hasLowerCase(kj::StringPtr name) {
454521
KJ_DREQUIRE(!('A' <= c && c <= 'Z'));
455522
}
456523
#endif
457-
return headers.find(getHeaderKeyFor(name)) != kj::none;
524+
return headers.find(headerKeyCache.getOrCreateKey(name)) != kj::none;
458525
}
459526

460527
kj::Array<Headers::DisplayedHeader> Headers::getDisplayedHeaders(jsg::Lock& js) {
@@ -554,7 +621,7 @@ kj::Maybe<jsg::ByteString> Headers::get(jsg::Lock& js, jsg::ByteString name) {
554621
}
555622

556623
kj::Maybe<jsg::ByteString> Headers::getNoChecks(jsg::Lock&, kj::StringPtr name) {
557-
KJ_IF_SOME(found, headers.find(getHeaderKeyFor(name))) {
624+
KJ_IF_SOME(found, headers.find(headerKeyCache.getOrCreateKey(name))) {
558625
return jsg::ByteString(kj::strArray(found.values, ", "));
559626
}
560627
return kj::none;
@@ -590,7 +657,7 @@ kj::ArrayPtr<jsg::ByteString> Headers::getAll(jsg::ByteString name) {
590657

591658
bool Headers::has(jsg::ByteString name) {
592659
requireValidHeaderName(name);
593-
return headers.find(getHeaderKeyFor(name)) != kj::none;
660+
return headers.find(headerKeyCache.getOrCreateKey(name)) != kj::none;
594661
}
595662

596663
bool Headers::hasCommon(capnp::CommonHeaderName idx) {
@@ -631,7 +698,7 @@ void Headers::append(jsg::Lock& js, jsg::ByteString name, jsg::ByteString value)
631698

632699
void Headers::appendUnguarded(jsg::Lock& js, jsg::ByteString name, jsg::ByteString value) {
633700
// If the header already exists, we just add to its values.
634-
auto key = getHeaderKeyFor(name);
701+
auto& key = headerKeyCache.getOrCreateKey(name);
635702
KJ_IF_SOME(found, headers.find(key)) {
636703
found.values.add(kj::mv(value));
637704
} else {
@@ -642,7 +709,7 @@ void Headers::appendUnguarded(jsg::Lock& js, jsg::ByteString name, jsg::ByteStri
642709
void Headers::delete_(jsg::ByteString name) {
643710
checkGuard();
644711
requireValidHeaderName(name);
645-
headers.eraseMatch(getHeaderKeyFor(name));
712+
headers.eraseMatch(headerKeyCache.getOrCreateKey(name));
646713
}
647714

648715
// There are a couple implementation details of the Headers iterators worth calling out.
@@ -774,7 +841,7 @@ void Headers::serialize(jsg::Lock& js, jsg::Serializer& serializer) {
774841
KJ_CASE_ONEOF(commonId, uint) {
775842
serializer.writeRawUint32(commonId);
776843
}
777-
KJ_CASE_ONEOF(uncommonKey, UncommonHeaderKey) {
844+
KJ_CASE_ONEOF(_, kj::Rc<UncommonHeaderKey>) {
778845
serializer.writeRawUint32(0);
779846
serializer.writeLengthDelimited(header.getHeaderName());
780847
}

src/workerd/api/headers.h

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,17 @@ class Headers final: public jsg::Object {
3232
jsg::ByteString value; // comma-concatenation of all values seen
3333
};
3434

35-
Headers(): guard(Guard::NONE) {}
35+
Headers(): guard(Guard::NONE) {
36+
// Pre-allocate some space for headers to avoid rehashes in the common cases.
37+
headers.reserve(10);
38+
}
3639
explicit Headers(jsg::Lock& js, jsg::Dict<jsg::ByteString, jsg::ByteString> dict);
37-
explicit Headers(jsg::Lock& js, const Headers& other);
40+
explicit Headers(jsg::Lock& js, Headers& other);
3841
explicit Headers(jsg::Lock& js, const kj::HttpHeaders& other, Guard guard);
3942
KJ_DISALLOW_COPY_AND_MOVE(Headers);
4043

4144
// Make a copy of this Headers object, and preserve the guard.
42-
jsg::Ref<Headers> clone(jsg::Lock& js) const;
45+
jsg::Ref<Headers> clone(jsg::Lock& js);
4346

4447
// Fill in the given HttpHeaders with these headers. Note that strings are inserted by
4548
// reference, so the output must be consumed immediately.
@@ -164,13 +167,13 @@ class Headers final: public jsg::Object {
164167

165168
void visitForMemoryInfo(jsg::MemoryTracker& tracker) const;
166169

170+
struct HeaderKeyCache;
171+
167172
private:
168173
// The header key can be stored either as a common header ID (uint)
169-
class UncommonHeaderKey final {
174+
class UncommonHeaderKey final: public kj::Refcounted {
170175
public:
171-
UncommonHeaderKey(UncommonHeaderKey&&) = default;
172-
UncommonHeaderKey& operator=(UncommonHeaderKey&&) = default;
173-
KJ_DISALLOW_COPY(UncommonHeaderKey);
176+
KJ_DISALLOW_COPY_AND_MOVE(UncommonHeaderKey);
174177

175178
bool equals(const UncommonHeaderKey& other) const;
176179

@@ -186,38 +189,35 @@ class Headers final: public jsg::Object {
186189
tracker.trackField("name", name);
187190
}
188191

189-
UncommonHeaderKey clone() const {
190-
return UncommonHeaderKey(kj::str(name), hash);
191-
}
192+
UncommonHeaderKey(kj::String name);
193+
UncommonHeaderKey(kj::StringPtr name);
194+
UncommonHeaderKey(kj::String name, kj::uint hash): name(kj::mv(name)), hash(hash) {}
192195

193196
private:
194197
// The name is expected to be stored in lower-case form, but we do not
195-
// enforce that actually within the struct. It is the responsibility of
198+
// enforce that actually within this class. It is the responsibility of
196199
// the caller to ensure it is appropriately lower-cased.
197200
kj::String name;
198201
kj::uint hash;
199202

200203
// Critically, because of hash collisions, we must still compare the full string
201204
// to determine equality but we can use the hash code to avoid unnecessary string
202205
// comparisons.
203-
UncommonHeaderKey(kj::String name);
204-
UncommonHeaderKey(kj::StringPtr name);
205-
UncommonHeaderKey(kj::String name, kj::uint hash): name(kj::mv(name)), hash(hash) {}
206206
friend class Headers;
207207
};
208208

209-
// A header is identified by either a common header ID or an uncommon header name.
210-
// The header key name is always identifed in lower-case form, while the original
209+
// A header is identified by either a common header ID or an UncommonHeaderKey.
210+
// The header key name is always stored in lower-case form, while the original
211211
// casing is preserved in the actual Header struct to support case-preserving display.
212-
using HeaderKey = kj::OneOf<uint, UncommonHeaderKey>;
212+
using HeaderKey = kj::OneOf<uint, kj::Rc<UncommonHeaderKey>>;
213213

214-
static HeaderKey getHeaderKeyFor(kj::StringPtr name);
215-
static bool headerKeyEquals(const HeaderKey& a, const HeaderKey& b);
216-
static HeaderKey cloneHeaderKey(const HeaderKey& key);
217-
static bool isSetCookie(const HeaderKey& key);
214+
static inline HeaderKey getHeaderKeyFor(kj::StringPtr name);
215+
static inline bool headerKeyEquals(const HeaderKey& a, const HeaderKey& b);
216+
static inline HeaderKey cloneHeaderKey(HeaderKey& key);
217+
static inline bool isSetCookie(const HeaderKey& key);
218218

219219
struct Header final {
220-
// The header key, either a common header ID or an uncommon header name.
220+
// The header key, either a common header ID or an UncommonHeaderKey.
221221
HeaderKey key;
222222
// If the casing of the header name does not match the lower-cased version, we
223223
// store the original casing here for display purposes. If the casing matches, this
@@ -248,10 +248,12 @@ class Headers final: public jsg::Object {
248248
kj::Vector<jsg::ByteString> values);
249249
explicit Header(HeaderKey key, jsg::ByteString name, jsg::ByteString value);
250250

251-
Header clone() const;
251+
Header clone();
252252

253253
JSG_MEMORY_INFO(Header) {
254-
tracker.trackField("key", key.tryGet<UncommonHeaderKey>());
254+
KJ_IF_SOME(uncommonKey, key.tryGet<kj::Rc<UncommonHeaderKey>>()) {
255+
tracker.trackFieldWithSize("key", uncommonKey->getName().size());
256+
}
255257
tracker.trackField("name", name);
256258
for (const auto& value : values) {
257259
tracker.trackField(nullptr, value);
@@ -268,6 +270,7 @@ class Headers final: public jsg::Object {
268270
static kj::uint hashCode(const HeaderKey& key);
269271
static kj::uint hashCode(capnp::CommonHeaderName commondId);
270272
};
273+
friend struct HeaderKeyCache;
271274

272275
kj::Table<Header, kj::HashIndex<HeaderCallbacks>> headers;
273276

0 commit comments

Comments
 (0)