Skip to content

Commit 7e81f19

Browse files
committed
Define MAX_UNICODE_UTF8_BYTES
This value is the maximum number of bytes required to represent in UTF-8 any code point in the legal Unicode range of 0 .. 0x10FFFF
1 parent 62e1505 commit 7e81f19

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

utf8.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,11 @@ regen/charset_translations.pl. */
574574
+ (pos) + ((UTF_CONTINUATION_BYTE_INFO_BITS - 1) - 1)) /* Step fcn */ \
575575
/ (UTF_CONTINUATION_BYTE_INFO_BITS - 1)) /* take floor of */
576576

577+
578+
/* The maximum number of bytes required to represent any Unicode code point
579+
* 0..0x10FFFF */
580+
#define MAX_UNICODE_UTF8_BYTES UNISKIP_BY_MSB_(20)
581+
577582
/* Compute the number of UTF-8 bytes required for representing the input uv,
578583
* which must be a Unicode, not native value.
579584
*
@@ -728,7 +733,7 @@ uppercase/lowercase/titlecase/fold into.
728733
=cut
729734
*/
730735
#define UTF8_MAXBYTES_CASE \
731-
MAX(UTF8_MAXBYTES, UTF8_MAX_FOLD_CHAR_EXPAND * UNISKIP_BY_MSB_(20))
736+
MAX(UTF8_MAXBYTES, UTF8_MAX_FOLD_CHAR_EXPAND * MAX_UNICODE_UTF8_BYTES)
732737

733738
/* Rest of these are attributes of Unicode and perl's internals rather than the
734739
* encoding, or happen to be the same in both ASCII and EBCDIC (at least at
@@ -1090,7 +1095,7 @@ this macro matches
10901095
|| NATIVE_UTF8_TO_I8(s[1]) >= UTF_FIRST_CONT_BYTE_110000_))
10911096

10921097
#define UTF8_IS_SUPER(s, e) \
1093-
((((e) - (s)) >= UNISKIP_BY_MSB_(20) && UTF8_IS_SUPER_NO_CHECK_(s)) \
1098+
((((e) - (s)) >= MAX_UNICODE_UTF8_BYTES && UTF8_IS_SUPER_NO_CHECK_(s)) \
10941099
? isUTF8_CHAR(s, e) \
10951100
: 0)
10961101

0 commit comments

Comments
 (0)