@@ -3169,22 +3169,31 @@ Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
3169
3169
PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF ;
3170
3170
assert (s < send );
3171
3171
3172
- if (! ckWARN_d ( WARN_UTF8 )) {
3172
+ UV cp ;
3173
3173
3174
- /* EMPTY is not really allowed, and asserts on debugging builds. But
3175
- * on non-debugging we have to deal with it, and this causes it to
3176
- * return the REPLACEMENT CHARACTER, as the documentation indicates */
3177
- return utf8n_to_uvchr (s , send - s , retlen ,
3178
- (UTF8_ALLOW_ANY | UTF8_ALLOW_EMPTY ));
3174
+ /* When everything is legal, just return that; but when not:
3175
+ * 1) if warnings are enabled return 0 and retlen to -1
3176
+ * 2) if warnings are disabled, set 'flags' to accept any malformation,
3177
+ * but that will just cause the REPLACEMENT CHARACTER to be returned,
3178
+ * as the documentation indicates. EMPTY is not really allowed, and
3179
+ * asserts on debugging builds. But on non-debugging we have to deal
3180
+ * with it.
3181
+ * This API means 0 can mean a legal NUL, or the input is malformed; and
3182
+ * the caller has to know if warnings are disabled to know if it can rely on
3183
+ * 'retlen'. Best to use utf8_to_uv() instead */
3184
+ U32 flags = (ckWARN_d (WARN_UTF8 )) ? 0 : (UTF8_ALLOW_ANY | UTF8_ALLOW_EMPTY );
3185
+
3186
+ if ( LIKELY (utf8_to_uv_flags (s , send , & cp , retlen , flags ))
3187
+ || flags )
3188
+ {
3189
+ return cp ;
3179
3190
}
3180
- else {
3181
- UV ret = utf8n_to_uvchr (s , send - s , retlen , 0 );
3182
- if (retlen && ret == 0 && (send <= s || * s != '\0' )) {
3183
- * retlen = (STRLEN ) - 1 ;
3184
- }
3185
3191
3186
- return ret ;
3192
+ if (retlen ) {
3193
+ * retlen = (STRLEN ) - 1 ;
3187
3194
}
3195
+
3196
+ return 0 ;
3188
3197
}
3189
3198
3190
3199
/* ------------------------------- perl.h ----------------------------- */
0 commit comments