@@ -117,12 +117,15 @@ size_t findBestFit(const Char* data, size_t length, size_t bufferSize) {
117117 std::max (size_t {1 }, std::max (guaranteedToFit, likelyToFit));
118118 size_t chunkSize = std::min (remainingInput, fitEstimate);
119119 if (chunkSize == 1 ) break ;
120- DCHECK_GE (chunkSize, 1 );
120+ DCHECK_GT (chunkSize, 1 );
121121
122122 size_t chunkUtf8Len;
123123 if constexpr (UTF16) {
124124 // TODO(anonrig): Use utf8_length_from_utf16_with_replacement when
125125 // available For now, validate and use utf8_length_from_utf16
126+ size_t newPos = pos + chunkSize;
127+ if (newPos < length && isSurrogatePair (data[newPos - 1 ], data[newPos]))
128+ chunkSize--;
126129 chunkUtf8Len = simdutf::utf8_length_from_utf16 (data + pos, chunkSize);
127130 } else {
128131 chunkUtf8Len = simdutf::utf8_length_from_latin1 (data + pos, chunkSize);
@@ -223,7 +226,8 @@ void BindingData::EncodeInto(const FunctionCallbackInfo<Value>& args) {
223226 auto data = reinterpret_cast <const char16_t *>(view.data16 ());
224227
225228 // Limit conversion to what could fit in destination, avoiding splitting
226- // a valid surrogate pair at the boundary
229+ // a valid surrogate pair at the boundary, which could cause a spurious call
230+ // of simdutf::to_well_formed_utf16()
227231 if (length_that_fits > 0 && length_that_fits < view.length () &&
228232 isSurrogatePair (data[length_that_fits - 1 ], data[length_that_fits])) {
229233 length_that_fits--;
@@ -236,7 +240,7 @@ void BindingData::EncodeInto(const FunctionCallbackInfo<Value>& args) {
236240
237241 if (validation_result.error == simdutf::SUCCESS) {
238242 // Valid UTF-16 - use the fast path
239- read = findBestFit (data, view. length () , dest_length);
243+ read = findBestFit (data, length_that_fits , dest_length);
240244 if (read != 0 ) {
241245 DCHECK_LE (simdutf::utf8_length_from_utf16 (data, read), dest_length);
242246 written = simdutf::convert_utf16_to_utf8 (data, read, write_result);
@@ -262,7 +266,7 @@ void BindingData::EncodeInto(const FunctionCallbackInfo<Value>& args) {
262266 }
263267 }
264268 }
265- DCHECK_LE (written, dest_length );
269+ DCHECK_LE (written, dest-> ByteLength (); );
266270
267271 binding_data->encode_into_results_buffer_ [0 ] = static_cast <double >(read);
268272 binding_data->encode_into_results_buffer_ [1 ] = static_cast <double >(written);
0 commit comments