Skip to content

Commit 686c251

Browse files
committed
utf8.h: Define flags in terms of bit positions
This commit creates new #defines for the bit positions for the flags that get passed to various functions that deal with UTF-8 input, and then redefines the flag bits as the positions shifted to the proper place. There should be no change in the code generated by this.
1 parent 67c0a57 commit 686c251

File tree

1 file changed

+61
-30
lines changed

1 file changed

+61
-30
lines changed

utf8.h

Lines changed: 61 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1175,27 +1175,56 @@ point's representation.
11751175

11761176
/* The ordering of these bits is important to a switch() statement in utf8.c
11771177
* for handling problems in converting UTF-8 to a UV */
1178-
#define UTF8_GOT_OVERFLOW 0x0001
1179-
#define UTF8_ALLOW_OVERFLOW UTF8_GOT_OVERFLOW
1178+
#define UTF8_GOT_OVERFLOW_BIT_POS_ 0
1179+
#define UTF8_GOT_EMPTY_BIT_POS_ 1
1180+
#define UTF8_GOT_CONTINUATION_BIT_POS_ 2
1181+
#define UTF8_GOT_SHORT_BIT_POS_ 3
1182+
#define UTF8_GOT_NON_CONTINUATION_BIT_POS_ 4
11801183

1181-
#define UTF8_GOT_EMPTY 0x0002 /* Allow a zero length string */
1182-
#define UTF8_ALLOW_EMPTY UTF8_GOT_EMPTY
1184+
#define UTF8_GOT_SURROGATE_BIT_POS_ 5
1185+
#define UTF8_WARN_SURROGATE_BIT_POS_ 6
1186+
1187+
#define UTF8_GOT_PERL_EXTENDED_BIT_POS_ 7
1188+
#define UTF8_WARN_PERL_EXTENDED_BIT_POS_ 8
1189+
1190+
#define UTF8_GOT_SUPER_BIT_POS_ 9
1191+
#define UTF8_WARN_SUPER_BIT_POS_ 10
1192+
1193+
#define UTF8_GOT_NONCHAR_BIT_POS_ 11
1194+
#define UTF8_WARN_NONCHAR_BIT_POS_ 12
1195+
1196+
#define UTF8_GOT_LONG_BIT_POS_ 13
1197+
#define UTF8_GOT_LONG_WITH_VALUE_BIT_POS_ 14
1198+
1199+
#define UTF8_CHECK_ONLY_BIT_POS_ 15
1200+
#define UTF8_DIE_IF_MALFORMED_BIT_POS_ 16
1201+
#define UTF8_FORCE_WARN_IF_MALFORMED_BIT_POS_ 17
1202+
1203+
#define UTF8_NO_CONFIDENCE_IN_CURLEN_BIT_POS_ 18
1204+
1205+
#define UTF8_GOT_OVERFLOW (1U << UTF8_GOT_OVERFLOW_BIT_POS_)
1206+
#define UTF8_ALLOW_OVERFLOW UTF8_GOT_OVERFLOW
1207+
1208+
/* Allow a zero length string */
1209+
#define UTF8_GOT_EMPTY (1U << UTF8_GOT_EMPTY_BIT_POS_)
1210+
#define UTF8_ALLOW_EMPTY UTF8_GOT_EMPTY
11831211

11841212
/* Allow first byte to be a continuation byte */
1185-
#define UTF8_GOT_CONTINUATION 0x0004
1186-
#define UTF8_ALLOW_CONTINUATION UTF8_GOT_CONTINUATION
1213+
#define UTF8_GOT_CONTINUATION (1U << UTF8_GOT_CONTINUATION_BIT_POS_)
1214+
#define UTF8_ALLOW_CONTINUATION UTF8_GOT_CONTINUATION
11871215

11881216
/* expecting more bytes than were available in the string */
1189-
#define UTF8_GOT_SHORT 0x0008
1190-
#define UTF8_ALLOW_SHORT UTF8_GOT_SHORT
1217+
#define UTF8_GOT_SHORT (1U << UTF8_GOT_SHORT_BIT_POS_)
1218+
#define UTF8_ALLOW_SHORT UTF8_GOT_SHORT
11911219

11921220
/* Unexpected non-continuation byte */
1193-
#define UTF8_GOT_NON_CONTINUATION 0x0010
1194-
#define UTF8_ALLOW_NON_CONTINUATION UTF8_GOT_NON_CONTINUATION
1221+
#define UTF8_GOT_NON_CONTINUATION (1U << UTF8_GOT_NON_CONTINUATION_BIT_POS_)
1222+
#define UTF8_ALLOW_NON_CONTINUATION UTF8_GOT_NON_CONTINUATION
11951223

1196-
#define UTF8_GOT_SURROGATE 0x0020 /* Unicode surrogates */
1197-
#define UTF8_DISALLOW_SURROGATE UTF8_GOT_SURROGATE
1198-
#define UTF8_WARN_SURROGATE 0x0040
1224+
/* Unicode surrogates */
1225+
#define UTF8_GOT_SURROGATE (1U << UTF8_GOT_SURROGATE_BIT_POS_)
1226+
#define UTF8_DISALLOW_SURROGATE UTF8_GOT_SURROGATE
1227+
#define UTF8_WARN_SURROGATE (1U << UTF8_WARN_SURROGATE_BIT_POS_)
11991228

12001229
/* The original UTF-8 standard did not define UTF-8 with start bytes of 0xFE or
12011230
* 0xFF, though UTF-EBCDIC did. This allowed both versions to represent code
@@ -1206,27 +1235,27 @@ point's representation.
12061235
* extensions, and not likely to be interchangeable with other languages. Note
12071236
* that on ASCII platforms, FE overflows a signed 32-bit word, and FF an
12081237
* unsigned one. */
1209-
#define UTF8_GOT_PERL_EXTENDED 0x0080
1210-
#define UTF8_DISALLOW_PERL_EXTENDED UTF8_GOT_PERL_EXTENDED
1211-
#define UTF8_WARN_PERL_EXTENDED 0x0100
1238+
#define UTF8_GOT_PERL_EXTENDED (1U << UTF8_GOT_PERL_EXTENDED_BIT_POS_)
1239+
#define UTF8_DISALLOW_PERL_EXTENDED UTF8_GOT_PERL_EXTENDED
1240+
#define UTF8_WARN_PERL_EXTENDED (1U << UTF8_WARN_PERL_EXTENDED_BIT_POS_)
12121241

12131242
/* Super-set of Unicode: code points above the legal max */
1214-
#define UTF8_GOT_SUPER 0x0200
1215-
#define UTF8_DISALLOW_SUPER UTF8_GOT_SUPER
1216-
#define UTF8_WARN_SUPER 0x0400
1243+
#define UTF8_GOT_SUPER (1U << UTF8_GOT_SUPER_BIT_POS_)
1244+
#define UTF8_DISALLOW_SUPER UTF8_GOT_SUPER
1245+
#define UTF8_WARN_SUPER (1U << UTF8_WARN_SUPER_BIT_POS_)
12171246

12181247
/* Unicode non-character code points */
1219-
#define UTF8_GOT_NONCHAR 0x0800
1220-
#define UTF8_DISALLOW_NONCHAR UTF8_GOT_NONCHAR
1221-
#define UTF8_WARN_NONCHAR 0x1000
1248+
#define UTF8_GOT_NONCHAR (1U << UTF8_GOT_NONCHAR_BIT_POS_)
1249+
#define UTF8_DISALLOW_NONCHAR UTF8_GOT_NONCHAR
1250+
#define UTF8_WARN_NONCHAR (1U << UTF8_WARN_NONCHAR_BIT_POS_)
12221251

12231252
/* Overlong sequence; i.e., the code point can be specified in fewer bytes.
12241253
* First one will convert the overlong to the REPLACEMENT CHARACTER; second
12251254
* will return what the overlong evaluates to */
1226-
#define UTF8_GOT_LONG 0x2000
1227-
#define UTF8_ALLOW_LONG UTF8_GOT_LONG
1228-
#define UTF8_GOT_LONG_WITH_VALUE 0x4000
1229-
#define UTF8_ALLOW_LONG_AND_ITS_VALUE UTF8_GOT_LONG_WITH_VALUE
1255+
#define UTF8_GOT_LONG (1U << UTF8_GOT_LONG_BIT_POS_)
1256+
#define UTF8_ALLOW_LONG UTF8_GOT_LONG
1257+
#define UTF8_GOT_LONG_WITH_VALUE (1U << UTF8_GOT_LONG_WITH_VALUE_BIT_POS_)
1258+
#define UTF8_ALLOW_LONG_AND_ITS_VALUE UTF8_GOT_LONG_WITH_VALUE
12301259

12311260
/* For back compat, these old names are misleading for overlongs and
12321261
* UTF_EBCDIC. */
@@ -1236,10 +1265,12 @@ point's representation.
12361265
#define UTF8_DISALLOW_FE_FF UTF8_DISALLOW_PERL_EXTENDED
12371266
#define UTF8_WARN_FE_FF UTF8_WARN_PERL_EXTENDED
12381267

1239-
#define UTF8_CHECK_ONLY 0x8000
1240-
#define UTF8_NO_CONFIDENCE_IN_CURLEN_ 0x10000 /* Internal core use only */
1241-
#define UTF8_DIE_IF_MALFORMED 0x20000
1242-
#define UTF8_FORCE_WARN_IF_MALFORMED 0x40000
1268+
#define UTF8_CHECK_ONLY (1U << UTF8_CHECK_ONLY_BIT_POS_)
1269+
#define UTF8_NO_CONFIDENCE_IN_CURLEN_ /* Internal core use only */ \
1270+
(1U << UTF8_NO_CONFIDENCE_IN_CURLEN_BIT_POS_)
1271+
#define UTF8_DIE_IF_MALFORMED (1U << UTF8_DIE_IF_MALFORMED_BIT_POS_)
1272+
#define UTF8_FORCE_WARN_IF_MALFORMED \
1273+
(1U <<UTF8_FORCE_WARN_IF_MALFORMED_BIT_POS_)
12431274

12441275
/* For backwards source compatibility. They do nothing, as the default now
12451276
* includes what they used to mean. The first one's meaning was to allow the

0 commit comments

Comments
 (0)