Skip to content

Commit 18111a9

Browse files
committed
Generalize 3 functions to return length on success
Instead of a bool, they will now return the number of bytes that comprise the character being checked. So the result can be used as a bool, just as before; or the extra information can save recalculations, as done in the future commits.
1 parent 0e15d67 commit 18111a9

File tree

3 files changed

+19
-14
lines changed

3 files changed

+19
-14
lines changed

embed.fnc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1753,17 +1753,17 @@ ATdip |bool |is_utf8_fixed_width_buf_loclen_flags \
17531753
|NULLOK const U8 **ep \
17541754
|NULLOK STRLEN *el \
17551755
|const U32 flags
1756-
CRp |bool |is_utf8_FOO_ |const U8 classnum \
1756+
CRp |Size_t |is_utf8_FOO_ |const U8 classnum \
17571757
|NN const U8 *p \
17581758
|NN const U8 * const e
17591759
ARTdip |bool |is_utf8_invariant_string_loc \
17601760
|NN const U8 * const s \
17611761
|STRLEN len \
17621762
|NULLOK const U8 **ep
1763-
CRp |bool |is_utf8_perl_idcont_ \
1763+
CRp |Size_t |is_utf8_perl_idcont_ \
17641764
|NN const U8 *p \
17651765
|NN const U8 * const e
1766-
CRp |bool |is_utf8_perl_idstart_ \
1766+
CRp |Size_t |is_utf8_perl_idstart_ \
17671767
|NN const U8 *p \
17681768
|NN const U8 * const e
17691769
ARTdmp |bool |is_utf8_string |NN const U8 *s \

proto.h

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

utf8.c

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3900,32 +3900,37 @@ S_warn_on_first_deprecated_use(pTHX_ U32 category,
39003900
}
39013901
#endif
39023902

3903-
/* returns a boolean giving whether or not the UTF8-encoded character that
3904-
* starts at <p>, and extending no further than <e - 1> is in the inversion
3905-
* list <invlist>. */
3906-
STATIC bool
3903+
/* returns the number of bytes comprising the UTF8-encoded character that
3904+
* starts at <p>, and extending no further than <e - 1> if it is in the
3905+
* inversion list <invlist>; or 0 if it isn't */
3906+
STATIC Size_t
39073907
S_is_utf8_in_invlist(pTHX_ const U8 * p, const U8 * e, SV * const invlist)
39083908
{
3909-
return _invlist_contains_cp(invlist, utf8_to_uv_or_die(p, e, NULL));
3909+
Size_t advance;
3910+
if (_invlist_contains_cp(invlist, utf8_to_uv_or_die(p, e, &advance))) {
3911+
return advance;
3912+
}
3913+
3914+
return 0;
39103915
}
39113916

3912-
bool
3917+
Size_t
39133918
Perl_is_utf8_FOO_(pTHX_ const U8 classnum, const U8 *p, const U8 * const e)
39143919
{
39153920
PERL_ARGS_ASSERT_IS_UTF8_FOO_;
39163921

39173922
return S_is_utf8_in_invlist(aTHX_ p, e, PL_XPosix_ptrs[classnum]);
39183923
}
39193924

3920-
bool
3925+
Size_t
39213926
Perl_is_utf8_perl_idstart_(pTHX_ const U8 *p, const U8 * const e)
39223927
{
39233928
PERL_ARGS_ASSERT_IS_UTF8_PERL_IDSTART_;
39243929

39253930
return S_is_utf8_in_invlist(aTHX_ p, e, PL_utf8_perl_idstart);
39263931
}
39273932

3928-
bool
3933+
Size_t
39293934
Perl_is_utf8_perl_idcont_(pTHX_ const U8 *p, const U8 * const e)
39303935
{
39313936
PERL_ARGS_ASSERT_IS_UTF8_PERL_IDCONT_;

0 commit comments

Comments
 (0)