Skip to content

Commit 4f82c5a

Browse files
committed
src: improve StringBytes::Encode perf on ASCII
1 parent 2e597de commit 4f82c5a

File tree

2 files changed

+28
-7
lines changed

2 files changed

+28
-7
lines changed

src/encoding_binding.cc

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,27 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
356356
const char* data = buffer.data();
357357
size_t length = buffer.length();
358358

359+
if (!ignore_bom && length >= 3) {
360+
if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) {
361+
data += 3;
362+
length -= 3;
363+
}
364+
}
365+
359366
if (has_fatal) {
367+
// Are we perhaps ASCII? Then we won't have to check for UTF-8
368+
if (simdutf::validate_ascii(reinterpret_cast<const char*>(data), length)) {
369+
Local<Value> ret;
370+
if (StringBytes::Encode(env->isolate(),
371+
reinterpret_cast<const char*>(data),
372+
length,
373+
LATIN1)
374+
.ToLocal(&ret)) {
375+
args.GetReturnValue().Set(ret);
376+
}
377+
return;
378+
}
379+
360380
auto result = simdutf::validate_utf8_with_errors(data, length);
361381

362382
if (result.error) {
@@ -365,13 +385,6 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
365385
}
366386
}
367387

368-
if (!ignore_bom && length >= 3) {
369-
if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) {
370-
data += 3;
371-
length -= 3;
372-
}
373-
}
374-
375388
if (length == 0) return args.GetReturnValue().SetEmptyString();
376389

377390
Local<Value> ret;

src/string_bytes.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,14 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
531531

532532
case UTF8: {
533533
buflen = keep_buflen_in_range(buflen);
534+
535+
// ASCII fast path
536+
// TODO(chalker): remove when String::NewFromUtf8 is fast enough itself
537+
// This is cheap compared to the benefits though
538+
if (simdutf::validate_ascii(buf, buflen)) {
539+
return ExternOneByteString::NewFromCopy(isolate, buf, buflen);
540+
}
541+
534542
val =
535543
String::NewFromUtf8(isolate, buf, v8::NewStringType::kNormal, buflen);
536544
Local<String> str;

0 commit comments

Comments
 (0)