Skip to content

Commit 163f220

Browse files
committed
Personal comments as I read through...
1 parent fa6b204 commit 163f220

File tree

2 files changed

+46
-26
lines changed

2 files changed

+46
-26
lines changed

src/pcre2_compile.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6147,7 +6147,7 @@ for (;; pptr++)
61476147
else
61486148
{
61496149
*code++ = OP_CLASS;
6150-
memset(code, 0, 32 * sizeof(uint8_t));
6150+
memset(code, 0, 32);
61516151
code += 32 / sizeof(PCRE2_UCHAR);
61526152
}
61536153

src/pcre2_compile_class.c

Lines changed: 45 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -742,7 +742,7 @@ BOOL set_bit;
742742

743743
if (ptype == PT_ANY)
744744
{
745-
if (!negated) memset(classbits, 0xff, 32 * sizeof(uint8_t));
745+
if (!negated) memset(classbits, 0xff, 32);
746746
return;
747747
}
748748

@@ -1094,14 +1094,14 @@ if (utf)
10941094
}
10951095

10961096
class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */
1097-
#endif
1097+
#endif /* SUPPORT_WIDE_CHARS */
10981098

10991099
/* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map
11001100
in a temporary bit of memory, in case the class contains fewer than two
11011101
8-bit characters because in that case the compiled code doesn't use the bit
11021102
map. */
11031103

1104-
memset(classbits, 0, 32 * sizeof(uint8_t));
1104+
memset(classbits, 0, 32);
11051105

11061106
/* Process items until end_ptr is reached. */
11071107

@@ -1111,7 +1111,7 @@ while (TRUE)
11111111
BOOL local_negate;
11121112
int posix_class;
11131113
int taboffset, tabopt;
1114-
uint8_t pbits[32];
1114+
class_bits_storage pbits;
11151115
uint32_t escape, c;
11161116

11171117
/* Handle POSIX classes such as [:alpha:] etc. */
@@ -1123,6 +1123,8 @@ while (TRUE)
11231123
local_negate = (meta == META_POSIX_NEG);
11241124
posix_class = *(pptr++);
11251125

1126+
/* XXX !?!? why set this back to false, if you have local_negate=FALSE AFTER locale_negate=TRUE?
1127+
eg. [ [^:digit:] [:alpha:] ] */
11261128
should_flip_negation = local_negate; /* Note negative special */
11271129

11281130
/* If matching is caseless, upper and lower are converted to alpha.
@@ -1151,7 +1153,7 @@ while (TRUE)
11511153
ptype = (posix_class == PC_GRAPH)? PT_PXGRAPH :
11521154
(posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT;
11531155

1154-
PRIV(update_classbits)(ptype, 0, !local_negate, classbits);
1156+
PRIV(update_classbits)(ptype, 0, !local_negate /* XXX what? why flipped? */, classbits);
11551157

11561158
if ((xclass_props & XCLASS_HIGH_ANY) == 0)
11571159
{
@@ -1199,8 +1201,7 @@ while (TRUE)
11991201

12001202
/* Copy in the first table (always present) */
12011203

1202-
memcpy(pbits, cbits + PRIV(posix_class_maps)[posix_class],
1203-
32 * sizeof(uint8_t));
1204+
memcpy(pbits.classbits, cbits + PRIV(posix_class_maps)[posix_class], 32);
12041205

12051206
/* If there is a second table, add or remove it as required. */
12061207

@@ -1210,27 +1211,35 @@ while (TRUE)
12101211
if (taboffset >= 0)
12111212
{
12121213
if (tabopt >= 0)
1213-
for (int i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset];
1214+
for (int i = 0; i < 32; i++)
1215+
pbits.classbits[i] |= cbits[(int)i + taboffset];
12141216
else
1215-
for (int i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset];
1217+
for (int i = 0; i < 32; i++)
1218+
pbits.classbits[i] &= ~cbits[(int)i + taboffset];
12161219
}
12171220

12181221
/* Now see if we need to remove any special characters. An option
12191222
value of 1 removes vertical space and 2 removes underscore. */
12201223

12211224
if (tabopt < 0) tabopt = -tabopt;
1222-
if (tabopt == 1) pbits[1] &= ~0x3c;
1223-
else if (tabopt == 2) pbits[11] &= 0x7f;
1225+
if (tabopt == 1) pbits.classbits[1] &= ~0x3c;
1226+
else if (tabopt == 2) pbits.classbits[11] &= 0x7f;
12241227

12251228
/* Add the POSIX table or its complement into the main table that is
12261229
being built and we are done. */
12271230

1228-
if (local_negate)
1229-
for (int i = 0; i < 32; i++) classbits[i] |= (uint8_t)(~pbits[i]);
1230-
else
1231-
for (int i = 0; i < 32; i++) classbits[i] |= pbits[i];
1231+
{
1232+
uint32_t *classwords = cb->classbits.classwords;
12321233

1233-
#ifdef SUPPORT_UNICODE
1234+
if (local_negate)
1235+
for (int i = 0; i < 8; i++)
1236+
classwords[i] |= (uint8_t)(~pbits.classwords[i]);
1237+
else
1238+
for (int i = 0; i < 8; i++)
1239+
classwords[i] |= pbits.classwords[i];
1240+
}
1241+
1242+
#ifdef SUPPORT_WIDE_CHARS
12341243
/* Every class contains at least one < 256 character. */
12351244
xclass_props |= XCLASS_HAS_8BIT_CHARS;
12361245
#endif
@@ -1239,8 +1248,8 @@ while (TRUE)
12391248
/* Other than POSIX classes, the only items we should encounter are
12401249
\d-type escapes and literal characters (possibly as ranges). */
12411250
case META_BIGVALUE:
1242-
meta = *(pptr++);
1243-
break;
1251+
meta = *(pptr++);
1252+
break;
12441253

12451254
case META_ESCAPE:
12461255
escape = META_DATA(meta);
@@ -1293,7 +1302,8 @@ while (TRUE)
12931302
case ESC_h:
12941303
#if PCRE2_CODE_UNIT_WIDTH == 8
12951304
#ifdef SUPPORT_UNICODE
1296-
if (cranges != NULL) break;
1305+
if (utf) { PCRE2_ASSERT(cranges != NULL); }
1306+
else
12971307
#endif
12981308
add_list_to_class(options & ~PCRE2_CASELESS,
12991309
cb, PRIV(hspace_list));
@@ -1303,9 +1313,11 @@ while (TRUE)
13031313
break;
13041314

13051315
case ESC_H:
1316+
// XXX WHY NO should_flip_negation HERE?
13061317
#if PCRE2_CODE_UNIT_WIDTH == 8
13071318
#ifdef SUPPORT_UNICODE
1308-
if (cranges != NULL) break;
1319+
if (utf) { PCRE2_ASSERT(cranges != NULL); }
1320+
else
13091321
#endif
13101322
add_not_list_to_class(options & ~PCRE2_CASELESS,
13111323
cb, PRIV(hspace_list));
@@ -1317,7 +1329,8 @@ while (TRUE)
13171329
case ESC_v:
13181330
#if PCRE2_CODE_UNIT_WIDTH == 8
13191331
#ifdef SUPPORT_UNICODE
1320-
if (cranges != NULL) break;
1332+
if (utf) { PCRE2_ASSERT(cranges != NULL); }
1333+
else
13211334
#endif
13221335
add_list_to_class(options & ~PCRE2_CASELESS,
13231336
cb, PRIV(vspace_list));
@@ -1327,9 +1340,11 @@ while (TRUE)
13271340
break;
13281341

13291342
case ESC_V:
1343+
// XXX WHY NO should_flip_negation HERE?
13301344
#if PCRE2_CODE_UNIT_WIDTH == 8
13311345
#ifdef SUPPORT_UNICODE
1332-
if (cranges != NULL) break;
1346+
if (utf) { PCRE2_ASSERT(cranges != NULL); }
1347+
else
13331348
#endif
13341349
add_not_list_to_class(options & ~PCRE2_CASELESS,
13351350
cb, PRIV(vspace_list));
@@ -1352,7 +1367,7 @@ while (TRUE)
13521367
if (ptype == PT_ANY)
13531368
{
13541369
#if PCRE2_CODE_UNIT_WIDTH == 8
1355-
if (!utf && escape == ESC_p) memset(classbits, 0xff, 32 * sizeof(uint8_t));
1370+
if (!utf && escape == ESC_p) memset(classbits, 0xff, 32);
13561371
#endif
13571372
continue;
13581373
}
@@ -1368,6 +1383,8 @@ while (TRUE)
13681383
pdata = 0;
13691384
}
13701385

1386+
// XXX wow! so we update the classbits, but then throw them away!?!? (for compactness...?)
1387+
// actually... did I change this?
13711388
PRIV(update_classbits)(ptype, pdata,
13721389
(escape == ESC_P), classbits);
13731390

@@ -1390,14 +1407,17 @@ while (TRUE)
13901407

13911408
#ifdef SUPPORT_WIDE_CHARS
13921409
/* Every non-property class contains at least one < 256 character. */
1393-
xclass_props |= XCLASS_HAS_8BIT_CHARS;
1410+
xclass_props |= XCLASS_HAS_8BIT_CHARS; // XXX << !!!!!!! OK so this really isn't a guarantee, is it...
13941411
#endif
13951412
/* End handling \d-type escapes */
13961413
continue;
13971414

1415+
// XXX so, do we guarantee at least that if we *don't* have any 8-bit chars, and we *don't* XCLAS_HAS_PROPS, then it doesn't match any low values?
1416+
13981417
CLASS_END_CASES(meta)
13991418
/* Literals. */
14001419
if (meta < META_END) break;
1420+
/* Non-literals: end of class contents. */
14011421
goto END_PROCESSING;
14021422
}
14031423

@@ -1594,7 +1614,7 @@ if (cranges != NULL)
15941614
cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data);
15951615
}
15961616
}
1597-
#endif
1617+
#endif /* SUPPORT_WIDE_CHARS */
15981618

15991619
/* If there are characters with values > 255, or Unicode property settings
16001620
(\p or \P), we have to compile an extended class, with its own opcode,

0 commit comments

Comments
 (0)