@@ -117,12 +117,14 @@ size_t findBestFit(const Char* data, size_t length, size_t bufferSize) {
117117 std::max (size_t {1 }, std::max (guaranteedToFit, likelyToFit));
118118 size_t chunkSize = std::min (remainingInput, fitEstimate);
119119 if (chunkSize == 1 ) break ;
120- DCHECK_GE (chunkSize, 1 );
120+ DCHECK_GT (chunkSize, 1 );
121121
122122 size_t chunkUtf8Len;
123123 if constexpr (UTF16) {
124124 // TODO(anonrig): Use utf8_length_from_utf16_with_replacement when
125125 // available For now, validate and use utf8_length_from_utf16
126+ size_t newPos = pos + chunkSize;
127+ if (newPos < length && isSurrogatePair (data[newPos - 1 ], data[newPos])) chunkSize--;
126128 chunkUtf8Len = simdutf::utf8_length_from_utf16 (data + pos, chunkSize);
127129 } else {
128130 chunkUtf8Len = simdutf::utf8_length_from_latin1 (data + pos, chunkSize);
@@ -223,7 +225,8 @@ void BindingData::EncodeInto(const FunctionCallbackInfo<Value>& args) {
223225 auto data = reinterpret_cast <const char16_t *>(view.data16 ());
224226
225227 // Limit conversion to what could fit in destination, avoiding splitting
226- // a valid surrogate pair at the boundary
228+ // a valid surrogate pair at the boundary, which could cause a spurious call
229+ // of simdutf::to_well_formed_utf16()
227230 if (length_that_fits > 0 && length_that_fits < view.length () &&
228231 isSurrogatePair (data[length_that_fits - 1 ], data[length_that_fits])) {
229232 length_that_fits--;
@@ -236,7 +239,7 @@ void BindingData::EncodeInto(const FunctionCallbackInfo<Value>& args) {
236239
237240 if (validation_result.error == simdutf::SUCCESS) {
238241 // Valid UTF-16 - use the fast path
239- read = findBestFit (data, view. length () , dest_length);
242+ read = findBestFit (data, length_that_fits , dest_length);
240243 if (read != 0 ) {
241244 DCHECK_LE (simdutf::utf8_length_from_utf16 (data, read), dest_length);
242245 written = simdutf::convert_utf16_to_utf8 (data, read, write_result);
@@ -262,7 +265,7 @@ void BindingData::EncodeInto(const FunctionCallbackInfo<Value>& args) {
262265 }
263266 }
264267 }
265- DCHECK_LE (written, dest_length );
268+ DCHECK_LE (written, dest-> ByteLength (); );
266269
267270 binding_data->encode_into_results_buffer_ [0 ] = static_cast <double >(read);
268271 binding_data->encode_into_results_buffer_ [1 ] = static_cast <double >(written);
0 commit comments