@@ -302,94 +302,190 @@ private static string GetTimeoutExpression(int matchTimeout) =>
302
302
"Regex.InfiniteMatchTimeout" :
303
303
$ "TimeSpan.FromMilliseconds({ matchTimeout . ToString ( CultureInfo . InvariantCulture ) } )";
304
304
305
+ private const string IsBoundary = nameof ( IsBoundary ) ;
306
+ private const string IsECMABoundary = nameof ( IsECMABoundary ) ;
307
+ private const string IsWordChar = nameof ( IsWordChar ) ;
308
+ private const string IsBoundaryWordChar = nameof ( IsBoundaryWordChar ) ;
309
+ private const string IsPostWordCharBoundary = nameof ( IsPostWordCharBoundary ) ;
310
+ private const string IsPreWordCharBoundary = nameof ( IsPreWordCharBoundary ) ;
311
+ private const string IsECMABoundaryWordChar = nameof ( IsECMABoundaryWordChar ) ;
312
+ private const string WordCategoriesMask = nameof ( WordCategoriesMask ) ;
313
+ private const string WordCharBitmap = nameof ( WordCharBitmap ) ;
314
+
315
+ private static void AddWordCharHelpersSupport ( Dictionary < string , string [ ] > requiredHelpers )
316
+ {
317
+ const string WordCharHelpersSupport = nameof ( WordCharHelpersSupport ) ;
318
+ if ( ! requiredHelpers . ContainsKey ( WordCharHelpersSupport ) )
319
+ {
320
+ requiredHelpers . Add ( WordCharHelpersSupport ,
321
+ [
322
+ "/// <summary>Provides a mask of Unicode categories that combine to form [\\ w].</summary>" ,
323
+ $ "private const int { WordCategoriesMask } =",
324
+ " 1 << (int)UnicodeCategory.UppercaseLetter |" ,
325
+ " 1 << (int)UnicodeCategory.LowercaseLetter |" ,
326
+ " 1 << (int)UnicodeCategory.TitlecaseLetter |" ,
327
+ " 1 << (int)UnicodeCategory.ModifierLetter |" ,
328
+ " 1 << (int)UnicodeCategory.OtherLetter |" ,
329
+ " 1 << (int)UnicodeCategory.NonSpacingMark |" ,
330
+ " 1 << (int)UnicodeCategory.DecimalDigitNumber |" ,
331
+ " 1 << (int)UnicodeCategory.ConnectorPunctuation;" ,
332
+ "" ,
333
+ "/// <summary>Gets a bitmap for whether each character 0 through 127 is in [\\ w]</summary>" ,
334
+ $ "private static ReadOnlySpan<byte> { WordCharBitmap } => new byte[]",
335
+ "{" ,
336
+ " 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03," ,
337
+ " 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07" ,
338
+ "};" ,
339
+ ] ) ;
340
+ }
341
+ }
342
+
305
343
/// <summary>Adds the IsWordChar helper to the required helpers collection.</summary>
306
344
private static void AddIsWordCharHelper ( Dictionary < string , string [ ] > requiredHelpers )
307
345
{
308
- const string IsWordChar = nameof ( IsWordChar ) ;
309
346
if ( ! requiredHelpers . ContainsKey ( IsWordChar ) )
310
347
{
311
348
requiredHelpers . Add ( IsWordChar ,
312
349
[
313
- "/// <summary>Determines whether the character is part of the [\\ w] set.</summary>" ,
314
- "[MethodImpl(MethodImplOptions.AggressiveInlining)]" ,
315
- "internal static bool IsWordChar(char ch)" ,
316
- "{" ,
317
- " // Mask of Unicode categories that combine to form [\\ w]" ,
318
- " const int WordCategoriesMask =" ,
319
- " 1 << (int)UnicodeCategory.UppercaseLetter |" ,
320
- " 1 << (int)UnicodeCategory.LowercaseLetter |" ,
321
- " 1 << (int)UnicodeCategory.TitlecaseLetter |" ,
322
- " 1 << (int)UnicodeCategory.ModifierLetter |" ,
323
- " 1 << (int)UnicodeCategory.OtherLetter |" ,
324
- " 1 << (int)UnicodeCategory.NonSpacingMark |" ,
325
- " 1 << (int)UnicodeCategory.DecimalDigitNumber |" ,
326
- " 1 << (int)UnicodeCategory.ConnectorPunctuation;" ,
327
- "" ,
328
- " // Bitmap for whether each character 0 through 127 is in [\\ w]" ,
329
- " ReadOnlySpan<byte> ascii = new byte[]" ,
330
- " {" ,
331
- " 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03," ,
332
- " 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07" ,
333
- " };" ,
334
- "" ,
335
- " // If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category." ,
336
- " int chDiv8 = ch >> 3;" ,
337
- " return (uint)chDiv8 < (uint)ascii.Length ?" ,
338
- " (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :" ,
339
- " (WordCategoriesMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0;" ,
340
- "}" ,
350
+ $ "/// <summary>Determines whether the character is part of the [\\ w] set.</summary>",
351
+ $ "[MethodImpl(MethodImplOptions.AggressiveInlining)]",
352
+ $ "internal static bool { IsWordChar } (char ch)",
353
+ $ "{{",
354
+ $ " // If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.",
355
+ $ " ReadOnlySpan<byte> ascii = { WordCharBitmap } ;",
356
+ $ " int chDiv8 = ch >> 3;",
357
+ $ " return (uint)chDiv8 < (uint)ascii.Length ?",
358
+ $ " (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :",
359
+ $ " ({ WordCategoriesMask } & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0;",
360
+ $ "}}",
361
+ ] ) ;
362
+
363
+ AddWordCharHelpersSupport ( requiredHelpers ) ;
364
+ }
365
+ }
366
+
367
+ /// <summary>Adds the IsBoundary helper to the required helpers collection.</summary>
368
+ private static void AddIsBoundaryWordCharHelper ( Dictionary < string , string [ ] > requiredHelpers )
369
+ {
370
+ if ( ! requiredHelpers . ContainsKey ( IsBoundaryWordChar ) )
371
+ {
372
+ requiredHelpers . Add ( IsBoundaryWordChar ,
373
+ [
374
+ $ "/// <summary>Determines whether the specified index is a boundary word character.</summary>",
375
+ $ "/// <remarks>This is the same as \\ w plus U+200C ZERO WIDTH NON-JOINER and U+200D ZERO WIDTH JOINER.</remarks>",
376
+ $ "[MethodImpl(MethodImplOptions.AggressiveInlining)]",
377
+ $ "internal static bool { IsBoundaryWordChar } (char ch)",
378
+ $ "{{",
379
+ $ " ReadOnlySpan<byte> ascii = { WordCharBitmap } ;",
380
+ $ " int chDiv8 = ch >> 3;",
381
+ $ " return (uint)chDiv8 < (uint)ascii.Length ?",
382
+ $ " (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :",
383
+ $ " (({ WordCategoriesMask } & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0) || (ch is '\u200C ' or '\u200D ');",
384
+ $ "}}",
385
+ ] ) ;
386
+
387
+ AddWordCharHelpersSupport ( requiredHelpers ) ;
388
+ }
389
+ }
390
+
391
+ /// <summary>Adds the IsECMABoundary helper to the required helpers collection.</summary>
392
+ private static void AddIsECMABoundaryWordCharHelper ( Dictionary < string , string [ ] > requiredHelpers )
393
+ {
394
+ if ( ! requiredHelpers . ContainsKey ( IsECMABoundaryWordChar ) )
395
+ {
396
+ requiredHelpers . Add ( IsECMABoundaryWordChar ,
397
+ [
398
+ $ "/// <summary>Determines whether the specified index is a boundary (ECMAScript) word character.</summary>",
399
+ $ "[MethodImpl(MethodImplOptions.AggressiveInlining)]",
400
+ $ "internal static bool { IsECMABoundaryWordChar } (char ch) =>",
401
+ $ " char.IsAsciiLetterOrDigit(ch) ||",
402
+ $ " ch is '_' or '\\ u0130'; // latin capital letter I with dot above",
341
403
] ) ;
342
404
}
343
405
}
344
406
345
407
/// <summary>Adds the IsBoundary helper to the required helpers collection.</summary>
346
408
private static void AddIsBoundaryHelper ( Dictionary < string , string [ ] > requiredHelpers , bool checkOverflow )
347
409
{
348
- const string IsBoundary = nameof ( IsBoundary ) ;
349
410
if ( ! requiredHelpers . ContainsKey ( IsBoundary ) )
350
411
{
351
412
string uncheckedKeyword = checkOverflow ? "unchecked" : "" ;
352
413
requiredHelpers . Add ( IsBoundary ,
353
414
[
354
415
$ "/// <summary>Determines whether the specified index is a boundary.</summary>",
355
416
$ "[MethodImpl(MethodImplOptions.AggressiveInlining)]",
356
- $ "internal static bool IsBoundary(ReadOnlySpan<char> inputSpan, int index)",
417
+ $ "internal static bool { IsBoundary } (ReadOnlySpan<char> inputSpan, int index)",
357
418
$ "{{",
358
419
$ " int indexMinus1 = index - 1;",
359
- $ " return { uncheckedKeyword } ((uint)indexMinus1 < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[indexMinus1])) !=",
360
- $ " { uncheckedKeyword } ((uint)index < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[index]));",
361
- $ "" ,
362
- $ " static bool IsBoundaryWordChar(char ch) => IsWordChar(ch) || (ch == '\\ u200C' | ch == '\\ u200D');",
420
+ $ " return { uncheckedKeyword } ((uint)indexMinus1 < (uint)inputSpan.Length && { IsBoundaryWordChar } (inputSpan[indexMinus1])) !=",
421
+ $ " { uncheckedKeyword } ((uint)index < (uint)inputSpan.Length && { IsBoundaryWordChar } (inputSpan[index]));",
363
422
$ "}}",
364
423
] ) ;
365
424
366
- AddIsWordCharHelper ( requiredHelpers ) ;
425
+ AddIsBoundaryWordCharHelper ( requiredHelpers ) ;
426
+ }
427
+ }
428
+
429
+ /// <summary>Adds the IsPreWordCharBoundary helper to the required helpers collection.</summary>
430
+ private static void AddIsPreWordCharBoundaryHelper ( Dictionary < string , string [ ] > requiredHelpers , bool checkOverflow )
431
+ {
432
+ if ( ! requiredHelpers . ContainsKey ( IsPreWordCharBoundary ) )
433
+ {
434
+ string uncheckedKeyword = checkOverflow ? "unchecked" : "" ;
435
+ requiredHelpers . Add ( IsPreWordCharBoundary ,
436
+ [
437
+ $ "/// <summary>Determines whether the specified index is a boundary.</summary>",
438
+ $ "/// <remarks>This variant is only employed when the subsequent character will separately be validated as a word character.</remarks>",
439
+ $ "[MethodImpl(MethodImplOptions.AggressiveInlining)]",
440
+ $ "internal static bool { IsPreWordCharBoundary } (ReadOnlySpan<char> inputSpan, int index)",
441
+ $ "{{",
442
+ $ " int indexMinus1 = index - 1;",
443
+ $ " return { uncheckedKeyword } ((uint)indexMinus1 >= (uint)inputSpan.Length || !{ IsBoundaryWordChar } (inputSpan[indexMinus1]));",
444
+ $ "}}",
445
+ ] ) ;
446
+
447
+ AddIsBoundaryWordCharHelper ( requiredHelpers ) ;
448
+ }
449
+ }
450
+
451
+ /// <summary>Adds the IsPostWordCharBoundary helper to the required helpers collection.</summary>
452
+ private static void AddIsPostWordCharBoundaryHelper ( Dictionary < string , string [ ] > requiredHelpers , bool checkOverflow )
453
+ {
454
+ if ( ! requiredHelpers . ContainsKey ( IsPostWordCharBoundary ) )
455
+ {
456
+ string uncheckedKeyword = checkOverflow ? "unchecked" : "" ;
457
+ requiredHelpers . Add ( IsPostWordCharBoundary ,
458
+ [
459
+ $ "/// <summary>Determines whether the specified index is a boundary.</summary>",
460
+ $ "/// <remarks>This variant is only employed when the previous character has already been validated as a word character.</remarks>",
461
+ $ "[MethodImpl(MethodImplOptions.AggressiveInlining)]",
462
+ $ "internal static bool { IsPostWordCharBoundary } (ReadOnlySpan<char> inputSpan, int index) =>",
463
+ $ " { uncheckedKeyword } ((uint)index >= (uint)inputSpan.Length || !{ IsBoundaryWordChar } (inputSpan[index]));",
464
+ ] ) ;
465
+
466
+ AddIsBoundaryWordCharHelper ( requiredHelpers ) ;
367
467
}
368
468
}
369
469
370
470
/// <summary>Adds the IsECMABoundary helper to the required helpers collection.</summary>
371
471
private static void AddIsECMABoundaryHelper ( Dictionary < string , string [ ] > requiredHelpers , bool checkOverflow )
372
472
{
373
- const string IsECMABoundary = nameof ( IsECMABoundary ) ;
374
473
if ( ! requiredHelpers . ContainsKey ( IsECMABoundary ) )
375
474
{
376
475
string uncheckedKeyword = checkOverflow ? "unchecked" : "" ;
377
476
requiredHelpers . Add ( IsECMABoundary ,
378
477
[
379
478
$ "/// <summary>Determines whether the specified index is a boundary (ECMAScript).</summary>",
380
479
$ "[MethodImpl(MethodImplOptions.AggressiveInlining)]",
381
- $ "internal static bool IsECMABoundary(ReadOnlySpan<char> inputSpan, int index)",
480
+ $ "internal static bool { IsECMABoundary } (ReadOnlySpan<char> inputSpan, int index)",
382
481
$ "{{",
383
482
$ " int indexMinus1 = index - 1;",
384
- $ " return { uncheckedKeyword } ((uint)indexMinus1 < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[indexMinus1])) !=",
385
- $ " { uncheckedKeyword } ((uint)index < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[index]));",
386
- $ "" ,
387
- $ " static bool IsECMAWordChar(char ch) =>",
388
- $ " char.IsAsciiLetterOrDigit(ch) ||",
389
- $ " ch == '_' ||",
390
- $ " ch == '\\ u0130'; // latin capital letter I with dot above",
483
+ $ " return { uncheckedKeyword } ((uint)indexMinus1 < (uint)inputSpan.Length && { IsECMABoundaryWordChar } (inputSpan[indexMinus1])) !=",
484
+ $ " { uncheckedKeyword } ((uint)index < (uint)inputSpan.Length && { IsECMABoundaryWordChar } (inputSpan[index]));",
391
485
$ "}}",
392
486
] ) ;
487
+
488
+ AddIsECMABoundaryWordCharHelper ( requiredHelpers ) ;
393
489
}
394
490
}
395
491
@@ -3177,20 +3273,33 @@ void EmitBoundary(RegexNode node)
3177
3273
{
3178
3274
Debug . Assert ( node . Kind is RegexNodeKind . Boundary or RegexNodeKind . NonBoundary or RegexNodeKind . ECMABoundary or RegexNodeKind . NonECMABoundary , $ "Unexpected kind: { node . Kind } ") ;
3179
3275
3276
+ string negation = node . Kind is RegexNodeKind . Boundary or RegexNodeKind . ECMABoundary ? "!" : "" ;
3277
+
3180
3278
string call ;
3181
- if ( node . Kind is RegexNodeKind . Boundary or RegexNodeKind . NonBoundary )
3182
- {
3183
- call = node . Kind is RegexNodeKind . Boundary ?
3184
- $ "!{ HelpersTypeName } .IsBoundary" :
3185
- $ "{ HelpersTypeName } .IsBoundary";
3186
- AddIsBoundaryHelper ( requiredHelpers , checkOverflow ) ;
3187
- }
3188
- else
3279
+ switch ( node . Kind )
3189
3280
{
3190
- call = node . Kind is RegexNodeKind . ECMABoundary ?
3191
- $ "!{ HelpersTypeName } .IsECMABoundary" :
3192
- $ "{ HelpersTypeName } .IsECMABoundary";
3193
- AddIsECMABoundaryHelper ( requiredHelpers , checkOverflow ) ;
3281
+ case RegexNodeKind . Boundary or RegexNodeKind . NonBoundary :
3282
+ if ( node . IsKnownPrecededByWordChar ( ) )
3283
+ {
3284
+ call = $ "{ negation } { HelpersTypeName } .{ IsPostWordCharBoundary } ";
3285
+ AddIsPostWordCharBoundaryHelper ( requiredHelpers , checkOverflow ) ;
3286
+ }
3287
+ else if ( node . IsKnownSucceededByWordChar ( ) )
3288
+ {
3289
+ call = $ "{ negation } { HelpersTypeName } .{ IsPreWordCharBoundary } ";
3290
+ AddIsPreWordCharBoundaryHelper ( requiredHelpers , checkOverflow ) ;
3291
+ }
3292
+ else
3293
+ {
3294
+ call = $ "{ negation } { HelpersTypeName } .{ IsBoundary } ";
3295
+ AddIsBoundaryHelper ( requiredHelpers , checkOverflow ) ;
3296
+ }
3297
+ break ;
3298
+
3299
+ default :
3300
+ call = $ "{ negation } { HelpersTypeName } .{ IsECMABoundary } ";
3301
+ AddIsECMABoundaryHelper ( requiredHelpers , checkOverflow ) ;
3302
+ break ;
3194
3303
}
3195
3304
3196
3305
using ( EmitBlock ( writer , $ "if ({ call } (inputSpan, pos{ ( sliceStaticPos > 0 ? $ " + { sliceStaticPos } " : "" ) } ))") )
0 commit comments