Skip to content

Commit 8d7afe4

Browse files
committed
src: add EncodeValidatedUTF8 to avoid double validation
Signed-off-by: Guilherme Araújo <arauujogui@gmail.com>
1 parent 37779fd commit 8d7afe4

3 files changed

Lines changed: 46 additions & 51 deletions

File tree

src/encoding_binding.cc

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -471,14 +471,21 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
471471
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
472472
env->isolate(), "The encoded data was not valid for encoding utf-8");
473473
}
474+
475+
Local<Value> ret;
476+
// Data is already validated as UTF-8 above; skip redundant re-validation.
477+
if (StringBytes::EncodeValidatedUTF8(env->isolate(), data, length)
478+
.ToLocal(&ret)) {
479+
args.GetReturnValue().Set(ret);
480+
}
481+
return;
474482
}
475483

476484
if (length == 0) return args.GetReturnValue().SetEmptyString();
477485

478486
Local<Value> ret;
479487
v8::MaybeLocal<Value> encoded =
480-
has_fatal ? StringBytes::EncodeValidUtf8(env->isolate(), data, length)
481-
: StringBytes::Encode(env->isolate(), data, length, UTF8);
488+
StringBytes::Encode(env->isolate(), data, length, UTF8);
482489
if (encoded.ToLocal(&ret)) {
483490
args.GetReturnValue().Set(ret);
484491
}

src/string_bytes.cc

Lines changed: 32 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,37 @@ Maybe<size_t> StringBytes::Size(Isolate* isolate,
537537
} \
538538
} while (0)
539539

540+
// Converts known-valid, known-non-ASCII UTF-8 (buflen >= 32) to a V8 string
541+
// via the fast UTF-16 path. Callers must ensure buflen is range-checked.
542+
static MaybeLocal<Value> EncodeKnownValidNonAsciiUTF8(Isolate* isolate,
543+
const char* buf,
544+
size_t buflen) {
545+
size_t u16size = simdutf::utf16_length_from_utf8(buf, buflen);
546+
if (u16size > static_cast<size_t>(v8::String::kMaxLength)) {
547+
isolate->ThrowException(ERR_STRING_TOO_LONG(isolate));
548+
return MaybeLocal<Value>();
549+
}
550+
return EncodeTwoByteString(
551+
isolate, u16size, [buf, buflen, u16size](uint16_t* dst) {
552+
size_t written = simdutf::convert_valid_utf8_to_utf16(
553+
buf, buflen, reinterpret_cast<char16_t*>(dst));
554+
CHECK_EQ(written, u16size);
555+
});
556+
}
557+
558+
MaybeLocal<Value> StringBytes::EncodeValidatedUTF8(Isolate* isolate,
559+
const char* buf,
560+
size_t buflen) {
561+
buflen = keep_buflen_in_range(buflen);
562+
if (buflen >= 32)
563+
return EncodeKnownValidNonAsciiUTF8(isolate, buf, buflen);
564+
Local<String> str;
565+
if (!String::NewFromUtf8(isolate, buf, v8::NewStringType::kNormal, buflen)
566+
.ToLocal(&str))
567+
isolate->ThrowException(node::ERR_STRING_TOO_LONG(isolate));
568+
return str;
569+
}
570+
540571
MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
541572
const char* buf,
542573
size_t buflen,
@@ -650,17 +681,7 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
650681
static_cast<int>(r.count));
651682
}
652683
} else if (simdutf::validate_utf8(buf, buflen)) {
653-
size_t u16size = simdutf::utf16_length_from_utf8(buf, buflen);
654-
if (u16size > static_cast<size_t>(v8::String::kMaxLength)) {
655-
isolate->ThrowException(ERR_STRING_TOO_LONG(isolate));
656-
return MaybeLocal<Value>();
657-
}
658-
return EncodeTwoByteString(
659-
isolate, u16size, [buf, buflen, u16size](uint16_t* dst) {
660-
size_t written = simdutf::convert_valid_utf8_to_utf16(
661-
buf, buflen, reinterpret_cast<char16_t*>(dst));
662-
CHECK_EQ(written, u16size);
663-
});
684+
return EncodeKnownValidNonAsciiUTF8(isolate, buf, buflen);
664685
}
665686
}
666687

@@ -736,40 +757,6 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
736757
}
737758
}
738759

739-
MaybeLocal<Value> StringBytes::EncodeValidUtf8(Isolate* isolate,
740-
const char* buf,
741-
size_t buflen) {
742-
CHECK_BUFLEN_IN_RANGE(buflen);
743-
if (!buflen) return String::Empty(isolate);
744-
buflen = keep_buflen_in_range(buflen);
745-
746-
// ASCII fast path
747-
if (!simdutf::validate_ascii_with_errors(buf, buflen).error) {
748-
return ExternOneByteString::NewFromCopy(isolate, buf, buflen);
749-
}
750-
751-
if (buflen >= 32) {
752-
size_t u16size = simdutf::utf16_length_from_utf8(buf, buflen);
753-
if (u16size > static_cast<size_t>(v8::String::kMaxLength)) {
754-
isolate->ThrowException(ERR_STRING_TOO_LONG(isolate));
755-
return MaybeLocal<Value>();
756-
}
757-
return EncodeTwoByteString(
758-
isolate, u16size, [buf, buflen, u16size](uint16_t* dst) {
759-
size_t written = simdutf::convert_valid_utf8_to_utf16(
760-
buf, buflen, reinterpret_cast<char16_t*>(dst));
761-
CHECK_EQ(written, u16size);
762-
});
763-
}
764-
765-
Local<String> str;
766-
if (!String::NewFromUtf8(isolate, buf, v8::NewStringType::kNormal, buflen)
767-
.ToLocal(&str)) {
768-
isolate->ThrowException(node::ERR_STRING_TOO_LONG(isolate));
769-
}
770-
return str;
771-
}
772-
773760
MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
774761
const uint16_t* buf,
775762
size_t buflen) {

src/string_bytes.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,11 @@ class StringBytes {
8383
size_t buflen,
8484
enum encoding encoding);
8585

86-
// Like Encode(..., UTF8) but does not re-validate. Input must be valid UTF-8.
87-
static v8::MaybeLocal<v8::Value> EncodeValidUtf8(v8::Isolate* isolate,
88-
const char* buf,
89-
size_t buflen);
86+
// Like Encode(..., UTF8) but skips UTF-8 validation. Caller must guarantee
87+
// that buf contains valid UTF-8.
88+
static v8::MaybeLocal<v8::Value> EncodeValidatedUTF8(v8::Isolate* isolate,
89+
const char* buf,
90+
size_t buflen);
9091

9192
// Warning: This reverses endianness on BE platforms, even though the
9293
// signature using uint16_t implies that it should not.

0 commit comments

Comments
 (0)