@@ -1128,45 +1128,28 @@ point's representation.
1128
1128
/* Largest code point we accept from external sources */
1129
1129
#define MAX_LEGAL_CP ((UV)IV_MAX)
1130
1130
1131
+ #define UTF8_ALLOW_OVERFLOW 0x0001
1132
+ #define UTF8_GOT_OVERFLOW UTF8_ALLOW_OVERFLOW
1133
+
1131
1134
#define UTF8_ALLOW_EMPTY 0x0002 /* Allow a zero length string */
1132
1135
#define UTF8_GOT_EMPTY UTF8_ALLOW_EMPTY
1133
1136
1134
1137
/* Allow first byte to be a continuation byte */
1135
1138
#define UTF8_ALLOW_CONTINUATION 0x0004
1136
1139
#define UTF8_GOT_CONTINUATION UTF8_ALLOW_CONTINUATION
1137
1140
1138
- /* Unexpected non-continuation byte */
1139
- #define UTF8_ALLOW_NON_CONTINUATION 0x0010
1140
- #define UTF8_GOT_NON_CONTINUATION UTF8_ALLOW_NON_CONTINUATION
1141
-
1142
1141
/* expecting more bytes than were available in the string */
1143
1142
#define UTF8_ALLOW_SHORT 0x0008
1144
1143
#define UTF8_GOT_SHORT UTF8_ALLOW_SHORT
1145
1144
1146
- /* Overlong sequence; i.e., the code point can be specified in fewer bytes.
1147
- * First one will convert the overlong to the REPLACEMENT CHARACTER; second
1148
- * will return what the overlong evaluates to */
1149
- #define UTF8_ALLOW_LONG 0x2000
1150
- #define UTF8_ALLOW_LONG_AND_ITS_VALUE (UTF8_ALLOW_LONG|0x4000)
1151
- #define UTF8_GOT_LONG UTF8_ALLOW_LONG
1152
-
1153
- #define UTF8_ALLOW_OVERFLOW 0x0001
1154
- #define UTF8_GOT_OVERFLOW UTF8_ALLOW_OVERFLOW
1145
+ /* Unexpected non-continuation byte */
1146
+ #define UTF8_ALLOW_NON_CONTINUATION 0x0010
1147
+ #define UTF8_GOT_NON_CONTINUATION UTF8_ALLOW_NON_CONTINUATION
1155
1148
1156
1149
#define UTF8_DISALLOW_SURROGATE 0x0020 /* Unicode surrogates */
1157
1150
#define UTF8_GOT_SURROGATE UTF8_DISALLOW_SURROGATE
1158
1151
#define UTF8_WARN_SURROGATE 0x0040
1159
1152
1160
- /* Unicode non-character code points */
1161
- #define UTF8_DISALLOW_NONCHAR 0x0800
1162
- #define UTF8_GOT_NONCHAR UTF8_DISALLOW_NONCHAR
1163
- #define UTF8_WARN_NONCHAR 0x1000
1164
-
1165
- /* Super-set of Unicode: code points above the legal max */
1166
- #define UTF8_DISALLOW_SUPER 0x0200
1167
- #define UTF8_GOT_SUPER UTF8_DISALLOW_SUPER
1168
- #define UTF8_WARN_SUPER 0x0400
1169
-
1170
1153
/* The original UTF-8 standard did not define UTF-8 with start bytes of 0xFE or
1171
1154
* 0xFF, though UTF-EBCDIC did. This allowed both versions to represent code
1172
1155
* points up to 2 ** 31 - 1. Perl extends UTF-8 so that 0xFE and 0xFF are
@@ -1180,6 +1163,23 @@ point's representation.
1180
1163
#define UTF8_GOT_PERL_EXTENDED UTF8_DISALLOW_PERL_EXTENDED
1181
1164
#define UTF8_WARN_PERL_EXTENDED 0x0100
1182
1165
1166
+ /* Super-set of Unicode: code points above the legal max */
1167
+ #define UTF8_DISALLOW_SUPER 0x0200
1168
+ #define UTF8_GOT_SUPER UTF8_DISALLOW_SUPER
1169
+ #define UTF8_WARN_SUPER 0x0400
1170
+
1171
+ /* Unicode non-character code points */
1172
+ #define UTF8_DISALLOW_NONCHAR 0x0800
1173
+ #define UTF8_GOT_NONCHAR UTF8_DISALLOW_NONCHAR
1174
+ #define UTF8_WARN_NONCHAR 0x1000
1175
+
1176
+ /* Overlong sequence; i.e., the code point can be specified in fewer bytes.
1177
+ * First one will convert the overlong to the REPLACEMENT CHARACTER; second
1178
+ * will return what the overlong evaluates to */
1179
+ #define UTF8_ALLOW_LONG 0x2000
1180
+ #define UTF8_ALLOW_LONG_AND_ITS_VALUE (UTF8_ALLOW_LONG|0x4000)
1181
+ #define UTF8_GOT_LONG UTF8_ALLOW_LONG
1182
+
1183
1183
/* For back compat, these old names are misleading for overlongs and
1184
1184
* UTF_EBCDIC. */
1185
1185
#define UTF8_DISALLOW_ABOVE_31_BIT UTF8_DISALLOW_PERL_EXTENDED
0 commit comments