diff --git a/embed.fnc b/embed.fnc index efe62e6502b7..c35710d56423 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3906,7 +3906,10 @@ Adp |bool |valid_identifier_pvn \ |U32 flags Adp |bool |valid_identifier_sv \ |NULLOK SV *sv -CRTdip |UV |valid_utf8_to_uvchr \ +CRTdip |UV |valid_utf8_to_uv \ + |NN const U8 *s \ + |NULLOK STRLEN *retlen +CRTdmp |UV |valid_utf8_to_uvchr \ |NN const U8 *s \ |NULLOK STRLEN *retlen Adp |int |vcmp |NN SV *lhv \ diff --git a/embed.h b/embed.h index fa647eadca0a..c08064dbd45d 100644 --- a/embed.h +++ b/embed.h @@ -841,7 +841,8 @@ # define valid_identifier_pve(a,b,c) Perl_valid_identifier_pve(aTHX_ a,b,c) # define valid_identifier_pvn(a,b,c) Perl_valid_identifier_pvn(aTHX_ a,b,c) # define valid_identifier_sv(a) Perl_valid_identifier_sv(aTHX_ a) -# define valid_utf8_to_uvchr Perl_valid_utf8_to_uvchr +# define valid_utf8_to_uv Perl_valid_utf8_to_uv +# define Perl_valid_utf8_to_uvchr valid_utf8_to_uvchr # define vcmp(a,b) Perl_vcmp(aTHX_ a,b) # define vcroak(a,b) Perl_vcroak(aTHX_ a,b) # define vdeb(a,b) Perl_vdeb(aTHX_ a,b) diff --git a/inline.h b/inline.h index e3ff5cdd1372..659e0dba88ef 100644 --- a/inline.h +++ b/inline.h @@ -1306,25 +1306,36 @@ Perl_utf8_to_bytes_overwrite(pTHX_ U8 **s_ptr, STRLEN *lenp) } /* -=for apidoc valid_utf8_to_uvchr -Like C>, but should only be called when it is +=for apidoc valid_utf8_to_uv +=for apidoc_item valid_utf8_to_uvchr + +These are synonymous. + +These are like C>, but should only be called when it is known that the next character in the input UTF-8 string C is well-formed (I, it passes C>. Surrogates, non-character code points, and non-Unicode code points are allowed. +The only use for these is that they should run slightly faster than +C because no error checking is done. + +The C<_uv> form is slightly preferred so as to have a consistent spelling with +the other C<_uv> forms that are definitely preferred over the older and +problematic C<_uvchr> forms. + =cut */ PERL_STATIC_INLINE UV -Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen) +Perl_valid_utf8_to_uv(const U8 *s, STRLEN *retlen) { + PERL_ARGS_ASSERT_VALID_UTF8_TO_UV; + const UV expectlen = UTF8SKIP(s); const U8* send = s + expectlen; UV uv = *s; - PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR; - if (retlen) { *retlen = expectlen; } diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 8917ce5a4296..963911679527 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -460,6 +460,13 @@ C statements yourself to F that will be incorporated into the generated macro, beyond the system-generated ones. Comments and examples in F give details. +=item * + +A new function C has been added. This is synonymous +with C; its reason for existence is to have +consistent spelling with the names of the other functions that translate +from UTF-8, so you don't have to remember a different spelling. + =back =head1 Selected Bug Fixes diff --git a/proto.h b/proto.h index b74a9c7ef985..61e423ee8dc6 100644 --- a/proto.h +++ b/proto.h @@ -5428,6 +5428,10 @@ PERL_CALLCONV bool Perl_valid_identifier_sv(pTHX_ SV *sv); #define PERL_ARGS_ASSERT_VALID_IDENTIFIER_SV +/* PERL_CALLCONV UV +Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen) + __attribute__warn_unused_result__; */ + #define PERL_ARGS_ASSERT_VALIDATE_PROTO \ assert(name) @@ -10298,9 +10302,9 @@ Perl_uv_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags); assert(d) PERL_STATIC_INLINE UV -Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen) +Perl_valid_utf8_to_uv(const U8 *s, STRLEN *retlen) __attribute__warn_unused_result__; -# define PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR \ +# define PERL_ARGS_ASSERT_VALID_UTF8_TO_UV \ assert(s) PERL_STATIC_INLINE void diff --git a/utf8.h b/utf8.h index 162d8c054e4d..337d8c50ccdb 100644 --- a/utf8.h +++ b/utf8.h @@ -191,6 +191,7 @@ For details, see the description for L. #define c9strict_utf8_to_uv(s, e, cp_p, advance_p) \ utf8_to_uv_flags( s, e, cp_p, advance_p, \ UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE) +#define valid_utf8_to_uvchr(s, advance_p) valid_utf8_to_uv(s, advance_p) #define utf16_to_utf8(p, d, bytelen, newlen) \ utf16_to_utf8_base(p, d, bytelen, newlen, 0, 1)