@@ -742,7 +742,7 @@ BOOL set_bit;
742742
743743if (ptype == PT_ANY )
744744 {
745- if (!negated ) memset (classbits , 0xff , 32 * sizeof ( uint8_t ) );
745+ if (!negated ) memset (classbits , 0xff , 32 );
746746 return ;
747747 }
748748
@@ -1094,14 +1094,14 @@ if (utf)
10941094 }
10951095
10961096class_uchardata = code + LINK_SIZE + 2 ; /* For XCLASS items */
1097- #endif
1097+ #endif /* SUPPORT_WIDE_CHARS */
10981098
10991099/* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map
11001100in a temporary bit of memory, in case the class contains fewer than two
110111018-bit characters because in that case the compiled code doesn't use the bit
11021102map. */
11031103
1104- memset (classbits , 0 , 32 * sizeof ( uint8_t ) );
1104+ memset (classbits , 0 , 32 );
11051105
11061106/* Process items until end_ptr is reached. */
11071107
@@ -1111,7 +1111,7 @@ while (TRUE)
11111111 BOOL local_negate ;
11121112 int posix_class ;
11131113 int taboffset , tabopt ;
1114- uint8_t pbits [ 32 ] ;
1114+ class_bits_storage pbits ;
11151115 uint32_t escape , c ;
11161116
11171117 /* Handle POSIX classes such as [:alpha:] etc. */
@@ -1123,6 +1123,8 @@ while (TRUE)
11231123 local_negate = (meta == META_POSIX_NEG );
11241124 posix_class = * (pptr ++ );
11251125
1126+ /* XXX !?!? why set this back to false, if you have local_negate=FALSE AFTER locale_negate=TRUE?
1127+ eg. [ [^:digit:] [:alpha:] ] */
11261128 should_flip_negation = local_negate ; /* Note negative special */
11271129
11281130 /* If matching is caseless, upper and lower are converted to alpha.
@@ -1151,7 +1153,7 @@ while (TRUE)
11511153 ptype = (posix_class == PC_GRAPH )? PT_PXGRAPH :
11521154 (posix_class == PC_PRINT )? PT_PXPRINT : PT_PXPUNCT ;
11531155
1154- PRIV (update_classbits )(ptype , 0 , !local_negate , classbits );
1156+ PRIV (update_classbits )(ptype , 0 , !local_negate /* XXX what? why flipped? */ , classbits );
11551157
11561158 if ((xclass_props & XCLASS_HIGH_ANY ) == 0 )
11571159 {
@@ -1199,8 +1201,7 @@ while (TRUE)
11991201
12001202 /* Copy in the first table (always present) */
12011203
1202- memcpy (pbits , cbits + PRIV (posix_class_maps )[posix_class ],
1203- 32 * sizeof (uint8_t ));
1204+ memcpy (pbits .classbits , cbits + PRIV (posix_class_maps )[posix_class ], 32 );
12041205
12051206 /* If there is a second table, add or remove it as required. */
12061207
@@ -1210,27 +1211,35 @@ while (TRUE)
12101211 if (taboffset >= 0 )
12111212 {
12121213 if (tabopt >= 0 )
1213- for (int i = 0 ; i < 32 ; i ++ ) pbits [i ] |= cbits [(int )i + taboffset ];
1214+ for (int i = 0 ; i < 32 ; i ++ )
1215+ pbits .classbits [i ] |= cbits [(int )i + taboffset ];
12141216 else
1215- for (int i = 0 ; i < 32 ; i ++ ) pbits [i ] &= ~cbits [(int )i + taboffset ];
1217+ for (int i = 0 ; i < 32 ; i ++ )
1218+ pbits .classbits [i ] &= ~cbits [(int )i + taboffset ];
12161219 }
12171220
12181221 /* Now see if we need to remove any special characters. An option
12191222 value of 1 removes vertical space and 2 removes underscore. */
12201223
12211224 if (tabopt < 0 ) tabopt = - tabopt ;
1222- if (tabopt == 1 ) pbits [1 ] &= ~0x3c ;
1223- else if (tabopt == 2 ) pbits [11 ] &= 0x7f ;
1225+ if (tabopt == 1 ) pbits . classbits [1 ] &= ~0x3c ;
1226+ else if (tabopt == 2 ) pbits . classbits [11 ] &= 0x7f ;
12241227
12251228 /* Add the POSIX table or its complement into the main table that is
12261229 being built and we are done. */
12271230
1228- if (local_negate )
1229- for (int i = 0 ; i < 32 ; i ++ ) classbits [i ] |= (uint8_t )(~pbits [i ]);
1230- else
1231- for (int i = 0 ; i < 32 ; i ++ ) classbits [i ] |= pbits [i ];
1231+ {
1232+ uint32_t * classwords = cb -> classbits .classwords ;
12321233
1233- #ifdef SUPPORT_UNICODE
1234+ if (local_negate )
1235+ for (int i = 0 ; i < 8 ; i ++ )
1236+ classwords [i ] |= (uint8_t )(~pbits .classwords [i ]);
1237+ else
1238+ for (int i = 0 ; i < 8 ; i ++ )
1239+ classwords [i ] |= pbits .classwords [i ];
1240+ }
1241+
1242+ #ifdef SUPPORT_WIDE_CHARS
12341243 /* Every class contains at least one < 256 character. */
12351244 xclass_props |= XCLASS_HAS_8BIT_CHARS ;
12361245#endif
@@ -1239,8 +1248,8 @@ while (TRUE)
12391248 /* Other than POSIX classes, the only items we should encounter are
12401249 \d-type escapes and literal characters (possibly as ranges). */
12411250 case META_BIGVALUE :
1242- meta = * (pptr ++ );
1243- break ;
1251+ meta = * (pptr ++ );
1252+ break ;
12441253
12451254 case META_ESCAPE :
12461255 escape = META_DATA (meta );
@@ -1293,7 +1302,8 @@ while (TRUE)
12931302 case ESC_h :
12941303#if PCRE2_CODE_UNIT_WIDTH == 8
12951304#ifdef SUPPORT_UNICODE
1296- if (cranges != NULL ) break ;
1305+ if (utf ) { PCRE2_ASSERT (cranges != NULL ); }
1306+ else
12971307#endif
12981308 add_list_to_class (options & ~PCRE2_CASELESS ,
12991309 cb , PRIV (hspace_list ));
@@ -1303,9 +1313,11 @@ while (TRUE)
13031313 break ;
13041314
13051315 case ESC_H :
1316+ // XXX WHY NO should_flip_negation HERE?
13061317#if PCRE2_CODE_UNIT_WIDTH == 8
13071318#ifdef SUPPORT_UNICODE
1308- if (cranges != NULL ) break ;
1319+ if (utf ) { PCRE2_ASSERT (cranges != NULL ); }
1320+ else
13091321#endif
13101322 add_not_list_to_class (options & ~PCRE2_CASELESS ,
13111323 cb , PRIV (hspace_list ));
@@ -1317,7 +1329,8 @@ while (TRUE)
13171329 case ESC_v :
13181330#if PCRE2_CODE_UNIT_WIDTH == 8
13191331#ifdef SUPPORT_UNICODE
1320- if (cranges != NULL ) break ;
1332+ if (utf ) { PCRE2_ASSERT (cranges != NULL ); }
1333+ else
13211334#endif
13221335 add_list_to_class (options & ~PCRE2_CASELESS ,
13231336 cb , PRIV (vspace_list ));
@@ -1327,9 +1340,11 @@ while (TRUE)
13271340 break ;
13281341
13291342 case ESC_V :
1343+ // XXX WHY NO should_flip_negation HERE?
13301344#if PCRE2_CODE_UNIT_WIDTH == 8
13311345#ifdef SUPPORT_UNICODE
1332- if (cranges != NULL ) break ;
1346+ if (utf ) { PCRE2_ASSERT (cranges != NULL ); }
1347+ else
13331348#endif
13341349 add_not_list_to_class (options & ~PCRE2_CASELESS ,
13351350 cb , PRIV (vspace_list ));
@@ -1352,7 +1367,7 @@ while (TRUE)
13521367 if (ptype == PT_ANY )
13531368 {
13541369#if PCRE2_CODE_UNIT_WIDTH == 8
1355- if (!utf && escape == ESC_p ) memset (classbits , 0xff , 32 * sizeof ( uint8_t ) );
1370+ if (!utf && escape == ESC_p ) memset (classbits , 0xff , 32 );
13561371#endif
13571372 continue ;
13581373 }
@@ -1368,6 +1383,8 @@ while (TRUE)
13681383 pdata = 0 ;
13691384 }
13701385
1386+ // XXX wow! so we update the classbits, but then throw them away!?!? (for compactness...?)
1387+ // actually... did I change this?
13711388 PRIV (update_classbits )(ptype , pdata ,
13721389 (escape == ESC_P ), classbits );
13731390
@@ -1390,14 +1407,17 @@ while (TRUE)
13901407
13911408#ifdef SUPPORT_WIDE_CHARS
13921409 /* Every non-property class contains at least one < 256 character. */
1393- xclass_props |= XCLASS_HAS_8BIT_CHARS ;
1410+ xclass_props |= XCLASS_HAS_8BIT_CHARS ; // XXX << !!!!!!! OK so this really isn't a guarantee, is it...
13941411#endif
13951412 /* End handling \d-type escapes */
13961413 continue ;
13971414
1415+ // XXX so, do we guarantee at least that if we *don't* have any 8-bit chars, and we *don't* XCLAS_HAS_PROPS, then it doesn't match any low values?
1416+
13981417 CLASS_END_CASES (meta )
13991418 /* Literals. */
14001419 if (meta < META_END ) break ;
1420+ /* Non-literals: end of class contents. */
14011421 goto END_PROCESSING ;
14021422 }
14031423
@@ -1594,7 +1614,7 @@ if (cranges != NULL)
15941614 cb -> cx -> memctl .free (cranges , cb -> cx -> memctl .memory_data );
15951615 }
15961616 }
1597- #endif
1617+ #endif /* SUPPORT_WIDE_CHARS */
15981618
15991619/* If there are characters with values > 255, or Unicode property settings
16001620(\p or \P), we have to compile an extended class, with its own opcode,
0 commit comments