Skip to content

Commit eaced01

Browse files
anonrigerikcorry
andcommitted
Apply suggestions from code review
Co-authored-by: Erik Corry <[email protected]>
1 parent 66bc586 commit eaced01

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

src/encoding_binding.cc

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,12 +117,15 @@ size_t findBestFit(const Char* data, size_t length, size_t bufferSize) {
117117
std::max(size_t{1}, std::max(guaranteedToFit, likelyToFit));
118118
size_t chunkSize = std::min(remainingInput, fitEstimate);
119119
if (chunkSize == 1) break;
120-
DCHECK_GE(chunkSize, 1);
120+
DCHECK_GT(chunkSize, 1);
121121

122122
size_t chunkUtf8Len;
123123
if constexpr (UTF16) {
124124
// TODO(anonrig): Use utf8_length_from_utf16_with_replacement when
125125
// available For now, validate and use utf8_length_from_utf16
126+
size_t newPos = pos + chunkSize;
127+
if (newPos < length && isSurrogatePair(data[newPos - 1], data[newPos]))
128+
chunkSize--;
126129
chunkUtf8Len = simdutf::utf8_length_from_utf16(data + pos, chunkSize);
127130
} else {
128131
chunkUtf8Len = simdutf::utf8_length_from_latin1(data + pos, chunkSize);
@@ -223,7 +226,8 @@ void BindingData::EncodeInto(const FunctionCallbackInfo<Value>& args) {
223226
auto data = reinterpret_cast<const char16_t*>(view.data16());
224227

225228
// Limit conversion to what could fit in destination, avoiding splitting
226-
// a valid surrogate pair at the boundary
229+
// a valid surrogate pair at the boundary, which could cause a spurious call
230+
// of simdutf::to_well_formed_utf16()
227231
if (length_that_fits > 0 && length_that_fits < view.length() &&
228232
isSurrogatePair(data[length_that_fits - 1], data[length_that_fits])) {
229233
length_that_fits--;
@@ -236,7 +240,7 @@ void BindingData::EncodeInto(const FunctionCallbackInfo<Value>& args) {
236240

237241
if (validation_result.error == simdutf::SUCCESS) {
238242
// Valid UTF-16 - use the fast path
239-
read = findBestFit(data, view.length(), dest_length);
243+
read = findBestFit(data, length_that_fits, dest_length);
240244
if (read != 0) {
241245
DCHECK_LE(simdutf::utf8_length_from_utf16(data, read), dest_length);
242246
written = simdutf::convert_utf16_to_utf8(data, read, write_result);
@@ -262,7 +266,7 @@ void BindingData::EncodeInto(const FunctionCallbackInfo<Value>& args) {
262266
}
263267
}
264268
}
265-
DCHECK_LE(written, dest_length);
269+
DCHECK_LE(written, dest->ByteLength(););
266270

267271
binding_data->encode_into_results_buffer_[0] = static_cast<double>(read);
268272
binding_data->encode_into_results_buffer_[1] = static_cast<double>(written);

0 commit comments

Comments
 (0)