Skip to content

Commit 880bac9

Browse files
committed
utf8.h: Split a macro into components
This creates an internal macro that skips some error checking for use when we don't care if it is completely well-formed or not.
1 parent cea23d3 commit 880bac9

File tree

1 file changed

+10
-5
lines changed

1 file changed

+10
-5
lines changed

utf8.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,13 +1065,18 @@ this macro matches
10651065
#define UTF_START_BYTE_110000_ UTF_START_BYTE(PERL_UNICODE_MAX + 1, 21)
10661066
#define UTF_FIRST_CONT_BYTE_110000_ \
10671067
UTF_FIRST_CONT_BYTE(PERL_UNICODE_MAX + 1, 21)
1068+
1069+
/* Internal macro when we don't care about it being well-formed, and know we
1070+
* have two bytes available to read */
1071+
#define UTF8_IS_SUPER_NO_CHECK_(s) \
1072+
( NATIVE_UTF8_TO_I8(s[0]) >= UTF_START_BYTE_110000_ \
1073+
&& ( NATIVE_UTF8_TO_I8(s[0]) > UTF_START_BYTE_110000_ \
1074+
|| NATIVE_UTF8_TO_I8(s[1]) >= UTF_FIRST_CONT_BYTE_110000_))
1075+
10681076
#define UTF8_IS_SUPER(s, e) \
1069-
( ((e) - (s)) >= UNISKIP_BY_MSB_(20) \
1070-
&& ( NATIVE_UTF8_TO_I8(s[0]) >= UTF_START_BYTE_110000_ \
1071-
&& ( NATIVE_UTF8_TO_I8(s[0]) > UTF_START_BYTE_110000_ \
1072-
|| NATIVE_UTF8_TO_I8(s[1]) >= UTF_FIRST_CONT_BYTE_110000_))) \
1077+
((((e) - (s)) >= UNISKIP_BY_MSB_(20) && UTF8_IS_SUPER_NO_CHECK_(s)) \
10731078
? isUTF8_CHAR(s, e) \
1074-
: 0
1079+
: 0)
10751080

10761081
/*
10771082
=for apidoc Am|bool|UNICODE_IS_NONCHAR|const UV uv

0 commit comments

Comments
 (0)