Skip to content

Commit 137c0f0

Browse files
committed
Implement utf8_to_uvchr_buf in terms of utf8_to_uv_flags
This is simpler than the existing one.
1 parent 95f8a0b commit 137c0f0

File tree

1 file changed

+21
-12
lines changed

1 file changed

+21
-12
lines changed

inline.h

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3169,22 +3169,31 @@ Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
31693169
PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF;
31703170
assert(s < send);
31713171

3172-
if (! ckWARN_d(WARN_UTF8)) {
3172+
UV cp;
31733173

3174-
/* EMPTY is not really allowed, and asserts on debugging builds. But
3175-
* on non-debugging we have to deal with it, and this causes it to
3176-
* return the REPLACEMENT CHARACTER, as the documentation indicates */
3177-
return utf8n_to_uvchr(s, send - s, retlen,
3178-
(UTF8_ALLOW_ANY | UTF8_ALLOW_EMPTY));
3174+
/* When everything is legal, just return that; but when not:
3175+
* 1) if warnings are enabled return 0 and retlen to -1
3176+
* 2) if warnings are disabled, set 'flags' to accept any malformation,
3177+
* but that will just cause the REPLACEMENT CHARACTER to be returned,
3178+
* as the documentation indicates. EMPTY is not really allowed, and
3179+
* asserts on debugging builds. But on non-debugging we have to deal
3180+
* with it.
3181+
* This API means 0 can mean a legal NUL, or the input is malformed; and
3182+
* the caller has to know if warnings are disabled to know if it can rely on
3183+
* 'retlen'. Best to use utf8_to_uv() instead */
3184+
U32 flags = (ckWARN_d(WARN_UTF8)) ? 0 : (UTF8_ALLOW_ANY | UTF8_ALLOW_EMPTY);
3185+
3186+
if ( LIKELY(utf8_to_uv_flags(s, send, &cp, retlen, flags))
3187+
|| flags)
3188+
{
3189+
return cp;
31793190
}
3180-
else {
3181-
UV ret = utf8n_to_uvchr(s, send - s, retlen, 0);
3182-
if (retlen && ret == 0 && (send <= s || *s != '\0')) {
3183-
*retlen = (STRLEN) -1;
3184-
}
31853191

3186-
return ret;
3192+
if (retlen) {
3193+
*retlen = (STRLEN) -1;
31873194
}
3195+
3196+
return 0;
31883197
}
31893198

31903199
/* ------------------------------- perl.h ----------------------------- */

0 commit comments

Comments
 (0)