Skip to content

Commit 66bc586

Browse files
committed
fixup! util: improve textencoder encodeInto performance
1 parent f993411 commit 66bc586

File tree

1 file changed

+26
-26
lines changed

1 file changed

+26
-26
lines changed

src/encoding_binding.cc

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -198,22 +198,21 @@ void BindingData::EncodeInto(const FunctionCallbackInfo<Value>& args) {
198198
size_t read = 0;
199199
size_t written = 0;
200200
v8::String::ValueView view(isolate, source);
201-
size_t length = view.length();
201+
size_t length_that_fits =
202+
std::min(static_cast<size_t>(view.length()), dest_length);
202203

203204
if (view.is_one_byte()) {
204205
auto data = reinterpret_cast<const char*>(view.data8());
205-
simdutf::result result = simdutf::validate_ascii_with_errors(data, length);
206-
// Only copy what fits in the destination
207-
written = read = std::min(result.count, dest_length);
208-
if (read > 0) {
209-
memcpy(write_result, data, read);
210-
write_result += read;
211-
data += read;
212-
length -= read;
213-
dest_length -= read;
214-
}
215-
if (length != 0 && dest_length != 0) {
216-
size_t rest = findBestFit(data, length, dest_length);
206+
simdutf::result result =
207+
simdutf::validate_ascii_with_errors(data, length_that_fits);
208+
written = read = result.count;
209+
memcpy(write_result, data, read);
210+
write_result += read;
211+
data += read;
212+
length_that_fits -= read;
213+
dest_length -= read;
214+
if (length_that_fits != 0 && dest_length != 0) {
215+
size_t rest = findBestFit(data, length_that_fits, dest_length);
217216
if (rest != 0) {
218217
DCHECK_LE(simdutf::utf8_length_from_latin1(data, rest), dest_length);
219218
written += simdutf::convert_latin1_to_utf8(data, rest, write_result);
@@ -223,14 +222,21 @@ void BindingData::EncodeInto(const FunctionCallbackInfo<Value>& args) {
223222
} else {
224223
auto data = reinterpret_cast<const char16_t*>(view.data16());
225224

225+
// Limit conversion to what could fit in destination, avoiding splitting
226+
// a valid surrogate pair at the boundary
227+
if (length_that_fits > 0 && length_that_fits < view.length() &&
228+
isSurrogatePair(data[length_that_fits - 1], data[length_that_fits])) {
229+
length_that_fits--;
230+
}
231+
226232
// Check if input has unpaired surrogates - if so, convert to well-formed
227233
// first
228234
simdutf::result validation_result =
229-
simdutf::validate_utf16_with_errors(data, length);
235+
simdutf::validate_utf16_with_errors(data, length_that_fits);
230236

231237
if (validation_result.error == simdutf::SUCCESS) {
232238
// Valid UTF-16 - use the fast path
233-
read = findBestFit(data, length, dest_length);
239+
read = findBestFit(data, view.length(), dest_length);
234240
if (read != 0) {
235241
DCHECK_LE(simdutf::utf8_length_from_utf16(data, read), dest_length);
236242
written = simdutf::convert_utf16_to_utf8(data, read, write_result);
@@ -239,20 +245,14 @@ void BindingData::EncodeInto(const FunctionCallbackInfo<Value>& args) {
239245
// Invalid UTF-16 with unpaired surrogates - convert to well-formed first
240246
// TODO(anonrig): Use utf8_length_from_utf16_with_replacement when
241247
// available
242-
// Limit conversion to what could fit in destination, avoiding splitting
243-
// a valid surrogate pair at the boundary
244-
size_t safe_length = std::min(length, dest_length);
245-
if (safe_length > 0 && safe_length < view.length() &&
246-
isSurrogatePair(data[safe_length - 1], data[safe_length])) {
247-
safe_length--;
248-
}
249-
250248
MaybeStackBuffer<char16_t, MAX_SIZE_FOR_STACK_ALLOC> conversion_buffer(
251-
safe_length);
252-
simdutf::to_well_formed_utf16(data, safe_length, conversion_buffer.out());
249+
length_that_fits);
250+
simdutf::to_well_formed_utf16(
251+
data, length_that_fits, conversion_buffer.out());
253252

254253
// Now use findBestFit with the well-formed data
255-
read = findBestFit(conversion_buffer.out(), safe_length, dest_length);
254+
read =
255+
findBestFit(conversion_buffer.out(), length_that_fits, dest_length);
256256
if (read != 0) {
257257
DCHECK_LE(
258258
simdutf::utf8_length_from_utf16(conversion_buffer.out(), read),

0 commit comments

Comments
 (0)