@@ -5632,23 +5632,104 @@ private static string DescribeCapture(int capNum, RegexMethod rm)
5632
5632
}
5633
5633
5634
5634
/// <summary>Gets a textual description of what characters match a set.</summary>
5635
- private static string DescribeSet ( string charClass ) =>
5636
- charClass switch
5635
+ private static string DescribeSet ( string charClass )
5636
+ {
5637
+ string ? description = charClass switch
5637
5638
{
5638
5639
RegexCharClass . AnyClass => "any character" ,
5639
- RegexCharClass . DigitClass => "a Unicode digit" ,
5640
+ RegexCharClass . AsciiLetterClass => "an ASCII letter" ,
5641
+ RegexCharClass . AsciiLetterOrDigitClass => "an ASCII letter or digit" ,
5640
5642
RegexCharClass . ECMASpaceClass => "a whitespace character (ECMA)" ,
5641
5643
RegexCharClass . ECMAWordClass => "a word character (ECMA)" ,
5644
+ RegexCharClass . HexDigitClass => "a hexadecimal digit" ,
5645
+ RegexCharClass . HexDigitLowerClass => "a lowercase hexadecimal digit" ,
5646
+ RegexCharClass . HexDigitUpperClass => "an uppercase hexadecimal digit" ,
5647
+ RegexCharClass . LetterClass => "a Unicode letter" ,
5648
+ RegexCharClass . LetterOrDigitClass => "a Unicode letter or digit" ,
5649
+ RegexCharClass . NotAsciiLetterClass => "any character other than an ASCII letter" ,
5650
+ RegexCharClass . NotAsciiLetterOrDigitClass => "any character other than an ASCII letter or digit" ,
5651
+ RegexCharClass . NotControlClass => "any character other than a Unicode control character" ,
5642
5652
RegexCharClass . NotDigitClass => "any character other than a Unicode digit" ,
5643
5653
RegexCharClass . NotECMASpaceClass => "any character other than a whitespace character (ECMA)" ,
5644
5654
RegexCharClass . NotECMAWordClass => "any character other than a word character (ECMA)" ,
5655
+ RegexCharClass . NotHexDigitClass => "any character other than a hexadecimal digit" ,
5656
+ RegexCharClass . NotHexDigitLowerClass => "any character other than a lowercase hexadecimal digit" ,
5657
+ RegexCharClass . NotHexDigitUpperClass => "any character other than an uppercase hexadecimal digit" ,
5658
+ RegexCharClass . NotLetterClass => "any character other than a Unicode letter" ,
5659
+ RegexCharClass . NotLetterOrDigitClass => "any character other than a Unicode letter or digit" ,
5660
+ RegexCharClass . NotLowerClass => "any character other than a Unicode lowercase letter" ,
5661
+ RegexCharClass . NotNumberClass => "any character other than a Unicode number" ,
5662
+ RegexCharClass . NotPunctuationClass => "any character other than a Unicode punctuation character" ,
5663
+ RegexCharClass . NotSeparatorClass => "any character other than a Unicode separator" ,
5645
5664
RegexCharClass . NotSpaceClass => "any character other than a whitespace character" ,
5665
+ RegexCharClass . NotSymbolClass => "any character other than a Unicode symbol" ,
5666
+ RegexCharClass . NotUpperClass => "any character other than a Unicode uppercase letter" ,
5646
5667
RegexCharClass . NotWordClass => "any character other than a word character" ,
5668
+ RegexCharClass . NumberClass => "a Unicode number" ,
5669
+ RegexCharClass . PunctuationClass => "a Unicode punctuation character" ,
5670
+ RegexCharClass . SeparatorClass => "a Unicode separator" ,
5647
5671
RegexCharClass . SpaceClass => "a whitespace character" ,
5672
+ RegexCharClass . SymbolClass => "a Unicode symbol" ,
5648
5673
RegexCharClass . WordClass => "a word character" ,
5649
- _ => $ "a character in the set { RegexCharClass . DescribeSet ( charClass ) } " ,
5674
+ _ => null ,
5650
5675
} ;
5651
5676
5677
+ if ( description is not null )
5678
+ {
5679
+ return description ;
5680
+ }
5681
+
5682
+ Span < UnicodeCategory > categories = stackalloc UnicodeCategory [ 1 ] ;
5683
+ if ( RegexCharClass . TryGetOnlyCategories ( charClass , categories , out int numCategories , out bool negatedCategories ) &&
5684
+ numCategories == 1 )
5685
+ {
5686
+ ReadOnlySpan < string ? > categoryDescriptions =
5687
+ [
5688
+ "a Unicode uppercase letter" , // UppercaseLetter = 0,
5689
+ "a Unicode lowercase letter" , // LowercaseLetter = 1,
5690
+ "a Unicode titlecase letter" , // TitlecaseLetter = 2,
5691
+ "a Unicode modifier letter" , // ModifierLetter = 3,
5692
+ null , // OtherLetter = 4,
5693
+ "a Unicode non-spacing mark" , // NonSpacingMark = 5,
5694
+ "a Unicode spacing-combining mark" , // SpacingCombiningMark = 6,
5695
+ "a Unicode enclosing mark" , // EnclosingMark = 7,
5696
+ "a Unicode digit" , // DecimalDigitNumber = 8,
5697
+ "a Unicode letter number" , // LetterNumber = 9,
5698
+ null , // OtherNumber = 10,
5699
+ "a Unicode space separator" , // SpaceSeparator = 11,
5700
+ "a Unicode line separator" , // LineSeparator = 12,
5701
+ "a Unicode paragraph separator" , // ParagraphSeparator = 13,
5702
+ "a Unicode control character" , // Control = 14,
5703
+ "a Unicode format character" , // Format = 15,
5704
+ "a Unicode surrogate character" , // Surrogate = 16,
5705
+ "a Unicode private-use character" , // PrivateUse = 17,
5706
+ "a Unicode connector punctuation character" , // ConnectorPunctuation = 18,
5707
+ "a Unicode dash punctuation character" , // DashPunctuation = 19,
5708
+ "a Unicode open punctuation character" , // OpenPunctuation = 20,
5709
+ "a Unicode close punctuation character" , // ClosePunctuation = 21,
5710
+ "a Unicode initial quote punctuation character" , // InitialQuotePunctuation = 22,
5711
+ "a Unicode final quote punctuation character" , // FinalQuotePunctuation = 23,
5712
+ null , // OtherPunctuation = 24,
5713
+ "a Unicode math symbol" , // MathSymbol = 25,
5714
+ "a Unicode currency symbol" , // CurrencySymbol = 26,
5715
+ "a Unicode modifier symbol" , // ModifierSymbol = 27,
5716
+ null , // OtherSymbol = 28,
5717
+ "an unassigned Unicode code point" , // OtherNotAssigned = 29,
5718
+ ] ;
5719
+
5720
+ int cat = ( int ) categories [ 0 ] ;
5721
+ if ( ( uint ) cat < ( uint ) categoryDescriptions . Length &&
5722
+ ( description = categoryDescriptions [ cat ] ) is not null )
5723
+ {
5724
+ return negatedCategories ?
5725
+ $ "any character other than { description } " :
5726
+ description ;
5727
+ }
5728
+ }
5729
+
5730
+ return $ "a character in the set { RegexCharClass . DescribeSet ( charClass ) } ";
5731
+ }
5732
+
5652
5733
/// <summary>Writes a textual description of the node tree fit for rending in source.</summary>
5653
5734
/// <param name="writer">The writer to which the description should be written.</param>
5654
5735
/// <param name="node">The node being written.</param>
0 commit comments