Skip to content

Commit 0eb6447

Browse files
committed
Remove jsg::ByteString
The type is largely obsolete and was being used only to warn about invalid header names/values in Headers. However, it incurred an additional overhead when being created. This was especially non-sensical for header names since we already validate those to be proper HTTP token strings. Removing the type simplifies code and helps boost overall performance.
1 parent 2b37ff6 commit 0eb6447

23 files changed

+125
-180
lines changed

src/workerd/api/cache.c++

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,7 @@ kj::Own<kj::HttpClient> Cache::getHttpClient(IoContext& context,
532532
kj::Maybe<kj::String> cfBlobJson,
533533
kj::LiteralStringConst operationName,
534534
kj::StringPtr url,
535-
kj::Maybe<jsg::ByteString> cacheControl,
535+
kj::Maybe<kj::String> cacheControl,
536536
bool enableCompatFlags) {
537537
auto span = context.makeTraceSpan(operationName);
538538
auto userSpan = context.makeUserTraceSpan(operationName);

src/workerd/api/cache.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ class Cache: public jsg::Object {
9393
kj::Maybe<kj::String> cfBlobJson,
9494
kj::LiteralStringConst operationName,
9595
kj::StringPtr url,
96-
kj::Maybe<jsg::ByteString> cacheControl,
96+
kj::Maybe<kj::String> cacheControl,
9797
bool enableCompatFlags);
9898
};
9999

src/workerd/api/eventsource.c++

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -362,8 +362,6 @@ void EventSource::start(jsg::Lock& js) {
362362
js, self, kj::str("The response status code was ", response->getStatus(), "."));
363363
}
364364

365-
// TODO(cleanup): Using jsg::ByteString here is really annoying. It would be nice to have
366-
// an internal alternative that doesn't require an allocation.
367365
KJ_IF_SOME(contentType,
368366
response->getHeaders(js)->getCommon(js, capnp::CommonHeaderName::CONTENT_TYPE)) {
369367
bool invalid = false;

src/workerd/api/headers.c++

Lines changed: 56 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -164,15 +164,39 @@ static_assert(HEADER_HASH_TABLE.find("AcCePt-ChArSeT"_kj) == 1);
164164

165165
static_assert(std::size(COMMON_HEADER_NAMES) == (MAX_COMMON_HEADER_ID + 1));
166166

167-
inline constexpr void requireValidHeaderName(const jsg::ByteString& name) {
167+
inline constexpr void requireValidHeaderName(kj::StringPtr name) {
168+
if (HEADER_HASH_TABLE.find(name) != 0) {
169+
// Known common header, always valid
170+
return;
171+
}
168172
for (char c: name) {
169173
JSG_REQUIRE(util::isHttpTokenChar(c), TypeError, "Invalid header name.");
170174
}
171175
}
172176

177+
void maybeWarnIfBadHeaderString(kj::StringPtr str) {
178+
if (IoContext::hasCurrent()) {
179+
auto& context = IoContext::current();
180+
if (context.isInspectorEnabled()) {
181+
if (!simdutf::validate_ascii(str.begin(), str.size())) {
182+
// The string contains non-ASCII characters. While any 8-bit value is technically valid
183+
// in HTTP headers, we encode header strings as UTF-8, so we want to warn the user that
184+
// their header name/value may not be what they may expect based on what browsers do.
185+
auto utf8Hex =
186+
kj::strArray(KJ_MAP(b, str) { return kj::str("\\x", kj::hex(kj::byte(b))); }, "");
187+
context.logWarning(kj::str("A header value contains non-ASCII characters: \"", str,
188+
"\" (raw bytes: \"", utf8Hex,
189+
"\"). As a quirk to support Unicode, we are encoding "
190+
"values as UTF-8 in the header, but in a browser this would likely result in a "
191+
"TypeError exception. Consider encoding this string in ASCII for compatibility with "
192+
"browser implementations of the Fetch specification."));
193+
}
194+
}
195+
}
196+
}
197+
173198
// Left- and right-trim HTTP whitespace from `value`.
174-
kj::String normalizeHeaderValue(jsg::Lock& js, jsg::ByteString value) {
175-
JSG_REQUIRE(workerd::util::isValidHeaderValue(value), TypeError, "Invalid header value.");
199+
kj::String normalizeHeaderValue(kj::String value) {
176200
// Fast path: if empty, return as-is
177201
if (value.size() == 0) return kj::mv(value);
178202

@@ -183,9 +207,18 @@ kj::String normalizeHeaderValue(jsg::Lock& js, jsg::ByteString value) {
183207
while (begin < end && util::isHttpWhitespace(*(end - 1))) --end;
184208

185209
size_t newSize = end - begin;
186-
if (newSize == value.size()) return kj::mv(value);
210+
if (newSize == value.size()) {
211+
JSG_REQUIRE(workerd::util::isValidHeaderValue(value), TypeError, "Invalid header value.");
212+
maybeWarnIfBadHeaderString(value);
213+
return kj::mv(value);
214+
}
187215

188-
return kj::str(kj::ArrayPtr(begin, newSize));
216+
auto trimmed = kj::ArrayPtr(begin, newSize);
217+
JSG_REQUIRE(workerd::util::isValidHeaderValue(trimmed), TypeError, "Invalid header value.");
218+
maybeWarnIfBadHeaderString(value);
219+
// By attaching the original array to the trimmed view, we keep the original allocation alive
220+
// and prevent an unnecessary copy.
221+
return kj::str(trimmed.attach(value.releaseArray()));
189222
}
190223

191224
constexpr bool isSetCookie(const Headers::HeaderKey& key) {
@@ -285,8 +318,7 @@ kj::uint Headers::HeaderCallbacks::hashCode(capnp::CommonHeaderName commondId) {
285318
return kj::hashCode(commondId);
286319
}
287320

288-
Headers::Headers(jsg::Lock& js, jsg::Dict<jsg::ByteString, jsg::ByteString> dict)
289-
: guard(Guard::NONE) {
321+
Headers::Headers(jsg::Lock& js, jsg::Dict<kj::String, kj::String> dict): guard(Guard::NONE) {
290322
headers.reserve(dict.fields.size());
291323
for (auto& field: dict.fields) {
292324
append(js, kj::mv(field.name), kj::mv(field.value));
@@ -386,7 +418,7 @@ kj::Array<Headers::DisplayedHeader> Headers::getDisplayedHeaders(jsg::Lock& js)
386418
}
387419

388420
jsg::Ref<Headers> Headers::constructor(jsg::Lock& js, jsg::Optional<Initializer> init) {
389-
using StringDict = jsg::Dict<jsg::ByteString, jsg::ByteString>;
421+
using StringDict = jsg::Dict<kj::String, kj::String>;
390422

391423
KJ_IF_SOME(i, init) {
392424
KJ_SWITCH_ONEOF(kj::mv(i)) {
@@ -398,7 +430,7 @@ jsg::Ref<Headers> Headers::constructor(jsg::Lock& js, jsg::Optional<Initializer>
398430
// It's important to note here that we are treating the Headers object
399431
// as a special case here. Per the fetch spec, we *should* be grabbing
400432
// the Symbol.iterator off the Headers object and interpreting it as
401-
// a Sequence<Sequence<ByteString>> (as in the ByteStringPairs case
433+
// a Sequence<Sequence<kj::String>> (as in the StringPairs case
402434
// below). However, special casing Headers like we do here is more
403435
// performant and has other side effects such as preserving the casing
404436
// of header names that have been received.
@@ -415,7 +447,7 @@ jsg::Ref<Headers> Headers::constructor(jsg::Lock& js, jsg::Optional<Initializer>
415447
// implementation here, however, we are ignoring the Symbol.iterator so
416448
// the test fails.
417449
}
418-
KJ_CASE_ONEOF(pairs, ByteStringPairs) {
450+
KJ_CASE_ONEOF(pairs, StringPairs) {
419451
auto dict = KJ_MAP(entry, pairs) {
420452
JSG_REQUIRE(entry.size() == 2, TypeError,
421453
"To initialize a Headers object from a sequence, each inner sequence "
@@ -430,7 +462,7 @@ jsg::Ref<Headers> Headers::constructor(jsg::Lock& js, jsg::Optional<Initializer>
430462
return js.alloc<Headers>();
431463
}
432464

433-
kj::Maybe<kj::String> Headers::get(jsg::Lock& js, jsg::ByteString name) {
465+
kj::Maybe<kj::String> Headers::get(jsg::Lock& js, kj::String name) {
434466
requireValidHeaderName(name);
435467
return getUnguarded(js, name.asPtr());
436468
}
@@ -457,7 +489,7 @@ kj::Array<kj::StringPtr> Headers::getSetCookie() {
457489
return nullptr;
458490
}
459491

460-
kj::Array<kj::StringPtr> Headers::getAll(jsg::ByteString name) {
492+
kj::Array<kj::StringPtr> Headers::getAll(kj::String name) {
461493
requireValidHeaderName(name);
462494

463495
if (!strcaseeq(name, "set-cookie"_kj)) {
@@ -470,7 +502,7 @@ kj::Array<kj::StringPtr> Headers::getAll(jsg::ByteString name) {
470502
return getSetCookie();
471503
}
472504

473-
bool Headers::has(jsg::ByteString name) {
505+
bool Headers::has(kj::String name) {
474506
requireValidHeaderName(name);
475507
return headers.find(getHeaderKeyFor(name)) != kj::none;
476508
}
@@ -480,23 +512,24 @@ bool Headers::hasCommon(capnp::CommonHeaderName idx) {
480512
return headers.find(idx) != kj::none;
481513
}
482514

483-
void Headers::set(jsg::Lock& js, jsg::ByteString name, jsg::ByteString value) {
515+
void Headers::set(jsg::Lock& js, kj::String name, kj::String value) {
484516
checkGuard();
485517
requireValidHeaderName(name);
486-
setUnguarded(js, kj::mv(name), normalizeHeaderValue(js, kj::mv(value)));
518+
setUnguarded(js, kj::mv(name), normalizeHeaderValue(kj::mv(value)));
487519
}
488520

489521
void Headers::setUnguarded(jsg::Lock& js, kj::String name, kj::String value) {
490522
auto key = getHeaderKeyFor(name);
491523
auto& header = headers.findOrCreate(key, [&]() {
492524
Header header(kj::mv(key));
493-
if (header.getHeaderName() != name) {
525+
auto keyName = header.getKeyName();
526+
if (keyName.size() != name.size() || keyName != name) {
494527
header.name = kj::mv(name);
495528
}
496529
return kj::mv(header);
497530
});
498-
header.values.clear();
499-
header.values.add(kj::mv(value));
531+
header.values.resize(1);
532+
header.values[0] = kj::mv(value);
500533
}
501534

502535
void Headers::setCommon(capnp::CommonHeaderName idx, kj::String value) {
@@ -507,25 +540,26 @@ void Headers::setCommon(capnp::CommonHeaderName idx, kj::String value) {
507540
header.values.add(kj::mv(value));
508541
}
509542

510-
void Headers::append(jsg::Lock& js, jsg::ByteString name, jsg::ByteString value) {
543+
void Headers::append(jsg::Lock& js, kj::String name, kj::String value) {
511544
checkGuard();
512545
requireValidHeaderName(name);
513-
appendUnguarded(js, kj::mv(name), normalizeHeaderValue(js, kj::mv(value)));
546+
appendUnguarded(js, kj::mv(name), normalizeHeaderValue(kj::mv(value)));
514547
}
515548

516549
void Headers::appendUnguarded(jsg::Lock& js, kj::String name, kj::String value) {
517550
auto key = getHeaderKeyFor(name);
518551
auto& header = headers.findOrCreate(key, [&]() {
519552
Header header(kj::mv(key));
520-
if (header.getHeaderName() != name) {
553+
auto keyName = header.getKeyName();
554+
if (keyName.size() != name.size() || keyName != name) {
521555
header.name = kj::mv(name);
522556
}
523557
return kj::mv(header);
524558
});
525559
header.values.add(kj::mv(value));
526560
}
527561

528-
void Headers::delete_(jsg::ByteString name) {
562+
void Headers::delete_(kj::String name) {
529563
checkGuard();
530564
requireValidHeaderName(name);
531565
headers.eraseMatch(getHeaderKeyFor(name));

src/workerd/api/headers.h

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class Headers final: public jsg::Object {
3232
};
3333

3434
Headers(): guard(Guard::NONE) {}
35-
explicit Headers(jsg::Lock& js, jsg::Dict<jsg::ByteString, jsg::ByteString> dict);
35+
explicit Headers(jsg::Lock& js, jsg::Dict<kj::String, kj::String> dict);
3636
explicit Headers(jsg::Lock& js, const Headers& other);
3737
explicit Headers(jsg::Lock& js, const kj::HttpHeaders& other, Guard guard);
3838
KJ_DISALLOW_COPY_AND_MOVE(Headers);
@@ -51,42 +51,42 @@ class Headers final: public jsg::Object {
5151
// Returns headers with lower-case name and comma-concatenated duplicates.
5252
kj::Array<DisplayedHeader> getDisplayedHeaders(jsg::Lock& js);
5353

54-
using ByteStringPair = jsg::Sequence<jsg::ByteString>;
55-
using ByteStringPairs = jsg::Sequence<ByteStringPair>;
54+
using StringPair = jsg::Sequence<kj::String>;
55+
using StringPairs = jsg::Sequence<StringPair>;
5656

5757
// Per the fetch specification, it is possible to initialize a Headers object
5858
// from any other object that has a Symbol.iterator implementation. Those are
59-
// handled in this Initializer definition using the ByteStringPairs definition
60-
// that aliases jsg::Sequence<jsg::Sequence<jsg::ByteString>>. Technically,
59+
// handled in this Initializer definition using the StringPairs definition
60+
// that aliases jsg::Sequence<jsg::Sequence<kj::String>>. Technically,
6161
// the Headers object itself falls under that definition as well. However, treating
6262
// a Headers object as a jsg::Sequence<jsg::Sequence<T>> is nowhere near as
6363
// performant and has the side effect of forcing all header names to be lower-cased
6464
// rather than case-preserved. Instead of following the spec exactly here, we
6565
// choose to special case creating a Header object from another Header object.
6666
// This is an intentional departure from the spec.
6767
using Initializer = kj::OneOf<jsg::Ref<Headers>,
68-
ByteStringPairs,
69-
jsg::Dict<jsg::ByteString, jsg::ByteString>>;
68+
StringPairs,
69+
jsg::Dict<kj::String, kj::String>>;
7070

7171
static jsg::Ref<Headers> constructor(jsg::Lock& js, jsg::Optional<Initializer> init);
72-
kj::Maybe<kj::String> get(jsg::Lock& js, jsg::ByteString name);
72+
kj::Maybe<kj::String> get(jsg::Lock& js, kj::String name);
7373

7474
kj::Maybe<kj::String> getUnguarded(jsg::Lock& js, kj::StringPtr name);
7575

7676
// getAll is a legacy non-standard extension API that we introduced before
7777
// getSetCookie() was defined. We continue to support it for backwards
7878
// compatibility but users really ought to be using getSetCookie() now.
79-
kj::Array<kj::StringPtr> getAll(jsg::ByteString name);
79+
kj::Array<kj::StringPtr> getAll(kj::String name);
8080

8181
// The Set-Cookie header is special in that it is the only HTTP header that
8282
// is not permitted to be combined into a single instance.
8383
kj::Array<kj::StringPtr> getSetCookie();
8484

85-
bool has(jsg::ByteString name);
85+
bool has(kj::String name);
8686

87-
void set(jsg::Lock& js, jsg::ByteString name, jsg::ByteString value);
88-
void append(jsg::Lock& js, jsg::ByteString name, jsg::ByteString value);
89-
void delete_(jsg::ByteString name);
87+
void set(jsg::Lock& js, kj::String name, kj::String value);
88+
void append(jsg::Lock& js, kj::String name, kj::String value);
89+
void delete_(kj::String name);
9090

9191
void setUnguarded(jsg::Lock& js, kj::String name, kj::String value);
9292
void appendUnguarded(jsg::Lock& js, kj::String name, kj::String value);

src/workerd/api/http.c++

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ Body::Body(jsg::Lock& js, kj::Maybe<ExtractedBody> init, Headers& headers)
215215
if (!headers.hasCommon(capnp::CommonHeaderName::CONTENT_TYPE)) {
216216
// The spec allows the user to override the Content-Type, if they wish, so we only set
217217
// the Content-Type if it doesn't already exist.
218-
headers.setCommon(capnp::CommonHeaderName::CONTENT_TYPE, jsg::ByteString(kj::mv(ct)));
218+
headers.setCommon(capnp::CommonHeaderName::CONTENT_TYPE, kj::mv(ct));
219219
} else if (MimeType::FORM_DATA == ct) {
220220
// Custom content-type request/responses with FormData are broken since they require a
221221
// boundary parameter only the FormData serializer can provide. Let's warn if a dev does this.

src/workerd/api/r2-bucket.c++

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,11 @@ static kj::Date parseDate(jsg::Lock& js, kj::StringPtr value) {
5050
return js.date(value);
5151
}
5252

53-
static jsg::ByteString toUTCString(jsg::Lock& js, kj::Date date) {
53+
static kj::String toUTCString(jsg::Lock& js, kj::Date date) {
5454
return js.date(date).toUTCString(js);
5555
}
5656

57-
static jsg::ByteString toISOString(jsg::Lock& js, kj::Date date) {
57+
static kj::String toISOString(jsg::Lock& js, kj::Date date) {
5858
return js.date(date).toISOString(js);
5959
}
6060

src/workerd/api/trace.c++

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -341,8 +341,7 @@ jsg::Optional<jsg::V8Ref<v8::Object>> TraceItem::FetchEventInfo::Request::getCf(
341341
return detail->cf.map([&](jsg::V8Ref<v8::Object>& obj) { return obj.addRef(js); });
342342
}
343343

344-
jsg::Dict<jsg::ByteString, jsg::ByteString> TraceItem::FetchEventInfo::Request::getHeaders(
345-
jsg::Lock& js) {
344+
jsg::Dict<kj::String, kj::String> TraceItem::FetchEventInfo::Request::getHeaders(jsg::Lock& js) {
346345
auto shouldRedact = [](kj::StringPtr name) {
347346
return (
348347
//(name == "authorization"_kj) || // covered below
@@ -351,12 +350,11 @@ jsg::Dict<jsg::ByteString, jsg::ByteString> TraceItem::FetchEventInfo::Request::
351350
name.contains("token"_kjc));
352351
};
353352

354-
using HeaderDict = jsg::Dict<jsg::ByteString, jsg::ByteString>;
353+
using HeaderDict = jsg::Dict<kj::String, kj::String>;
355354
auto builder = kj::heapArrayBuilder<HeaderDict::Field>(detail->headers.size());
356355
for (const auto& header: detail->headers) {
357356
auto v = (redacted && shouldRedact(header.name)) ? "REDACTED"_kj : header.value;
358-
builder.add(
359-
HeaderDict::Field{jsg::ByteString(kj::str(header.name)), jsg::ByteString(kj::str(v))});
357+
builder.add(HeaderDict::Field{kj::str(header.name), kj::str(v)});
360358
}
361359

362360
// TODO(conform): Better to return a frozen JS Object?

src/workerd/api/trace.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ class TraceItem::FetchEventInfo::Request final: public jsg::Object {
220220
explicit Request(Detail& detail, bool redacted = true);
221221

222222
jsg::Optional<jsg::V8Ref<v8::Object>> getCf(jsg::Lock& js);
223-
jsg::Dict<jsg::ByteString, jsg::ByteString> getHeaders(jsg::Lock& js);
223+
jsg::Dict<kj::String, kj::String> getHeaders(jsg::Lock& js);
224224
kj::StringPtr getMethod();
225225
kj::String getUrl();
226226

src/workerd/jsg/README.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,16 +105,15 @@ At the time of writing this, the primitive value types currently supported by th
105105
| uint32_t | v8::Uint32 | number | |
106106
| uint64_t | v8::BigInt | bigint | |
107107
| kj::String(Ptr) | v8::String | string | |
108-
| jsg::ByteString | v8::String | string | See [ByteString][] spec |
109108
| kj::Date | v8::Date | Date | |
110109
| nullptr | v8::Null | null | See kj::Maybe&lt;T> |
111110
| nullptr | v8::Undefined | undefined | See jsg::Optional&lt;T> |
112111

113112
Specifically, for example, when mapping from JavaScript into C++, when JSG encounters a
114-
string value, it can convert that into either a `kj::String`, or `jsg::ByteString`,
113+
string value, it can convert that into either a `kj::String`, or `jsg::USVString`,
115114
depending on what is needed by the C++ layer. Likewise, when translating from C++ to
116115
JavaScript, JSG will generate a JavaScript `string` whenever it encounters a `kj::String`,
117-
`kj::StringPtr`, or `jsg::ByteString`.
116+
`kj::StringPtr`, or `jsg::USVString`.
118117

119118
JSG will *not* translate JavaScript `string` to `kj::StringPtr`.
120119

@@ -1414,7 +1413,6 @@ TODO(soon): TBD
14141413
14151414
["KJ Style Guide"]: https://github.com/capnproto/capnproto/blob/master/style-guide.md
14161415
["KJ Tour"]: https://github.com/capnproto/capnproto/blob/master/kjdoc/tour.md
1417-
[ByteString]: https://webidl.spec.whatwg.org/#idl-ByteString
14181416
[Record]: https://webidl.spec.whatwg.org/#idl-record
14191417
[Sequence]: https://webidl.spec.whatwg.org/#idl-sequence
14201418

0 commit comments

Comments
 (0)