2525import static com .sonar .sslr .impl .channel .RegexpChannelBuilder .ANY_CHAR ;
2626import static com .sonar .sslr .impl .channel .RegexpChannelBuilder .and ;
2727import static com .sonar .sslr .impl .channel .RegexpChannelBuilder .commentRegexp ;
28+ import static com .sonar .sslr .impl .channel .RegexpChannelBuilder .g ;
2829import static com .sonar .sslr .impl .channel .RegexpChannelBuilder .o2n ;
2930import static com .sonar .sslr .impl .channel .RegexpChannelBuilder .opt ;
31+ import static com .sonar .sslr .impl .channel .RegexpChannelBuilder .or ;
3032import static com .sonar .sslr .impl .channel .RegexpChannelBuilder .regexp ;
3133import com .sonar .sslr .impl .channel .UnknownCharacterChannel ;
3234import org .sonar .cxx .CxxConfiguration ;
3941public final class CppLexer {
4042
4143 private static final String HEX_PREFIX = "0[xX]" ;
42- private static final String EXPONENT = "([eE][+-]?+[0-9_]([']?+[0-9_]++)*+)" ;
43- private static final String BINARY_EXPONENT = "([pP][+-]?+[0-9]([']?+[0-9]++)*+)" ; // since C++17
44+ private static final String BIN_PREFIX = "0[bB]" ;
45+ private static final String EXPONENT = "[eE][+-]?+[0-9_]([']?+[0-9_]++)*+" ;
46+ private static final String BINARY_EXPONENT = "[pP][+-]?+[0-9]([']?+[0-9]++)*+" ; // since C++17
4447 //private static final String INTEGER_SUFFIX = "(((U|u)(LL|ll|L|l)?)|((LL|ll|L|l)(u|U)?))";
4548 //private static final String FLOAT_SUFFIX = "(f|l|F|L)";
4649 // ud-suffix: identifier (including INTEGER_SUFFIX, FLOAT_SUFFIX)
47- private static final String UD_SUFFIX = "([_a-zA-Z]([_a-zA-Z0-9]*+))" ;
48- private static final String HEXDIGIT_SEQUENCE = "([0-9a-fA-F]([']?+[0-9a-fA-F]++)*+)" ;
50+ private static final String UD_SUFFIX = "[_a-zA-Z][_a-zA-Z0-9]*+" ;
51+ private static final String DECDIGIT_SEQUENCE = "[0-9]([']?+[0-9]++)*+" ;
52+ private static final String HEXDIGIT_SEQUENCE = "[0-9a-fA-F]([']?+[0-9a-fA-F]++)*+" ;
53+ private static final String BINDIGIT_SEQUENCE = "[01]([']?+[01]++)*+" ;
54+ private static final String POINT = "\\ ." ;
4955
5056 private CppLexer () {
5157 }
@@ -67,24 +73,22 @@ public static Lexer create(CxxConfiguration conf) {
6773 .withChannel (commentRegexp ("/\\ *" , ANY_CHAR + "*?" , "\\ */" ))
6874 .withChannel (new CharacterLiteralsChannel ())
6975 .withChannel (new StringLiteralsChannel ())
70- // C++ Standard, Section 2.14.4 "Floating literals"
71- .withChannel (regexp (CxxTokenType .NUMBER , "[0-9]([']?+[0-9]++)*+\\ .([0-9]([']?+[0-9]++)*+)*+"
72- + opt (EXPONENT ) + opt (UD_SUFFIX )))
73- .withChannel (regexp (CxxTokenType .NUMBER , "\\ .[0-9]([']?+[0-9]++)*+"
74- + opt (EXPONENT ) + opt (UD_SUFFIX )))
75- .withChannel (regexp (CxxTokenType .NUMBER , "[0-9]([']?+[0-9]++)*+" + EXPONENT + opt (UD_SUFFIX )))
76- .withChannel (regexp (CxxTokenType .NUMBER , HEX_PREFIX + HEXDIGIT_SEQUENCE
77- + BINARY_EXPONENT + opt (UD_SUFFIX ))) // since C++17
78- .withChannel (regexp (CxxTokenType .NUMBER , HEX_PREFIX + HEXDIGIT_SEQUENCE + "."
79- + BINARY_EXPONENT + opt (UD_SUFFIX ))) // since C++17
80- .withChannel (regexp (CxxTokenType .NUMBER , HEX_PREFIX + opt (HEXDIGIT_SEQUENCE ) + "." + HEXDIGIT_SEQUENCE
81- + BINARY_EXPONENT + opt (UD_SUFFIX ))) // since C++17
76+
8277 // C++ Standard, Section 2.14.2 "Integer literals"
83- .withChannel (regexp (CxxTokenType .NUMBER , "[1-9]([']?+[0-9]++)*+" + opt (UD_SUFFIX ))) // Decimal literals
84- .withChannel (regexp (CxxTokenType .NUMBER , "0[bB][01]([']?+[01]++)*+" + opt (UD_SUFFIX ))) // Binary Literals
85- .withChannel (regexp (CxxTokenType .NUMBER , "0([']?+[0-7]++)++" + opt (UD_SUFFIX ))) // Octal Literals
86- .withChannel (regexp (CxxTokenType .NUMBER , HEX_PREFIX + HEXDIGIT_SEQUENCE + opt (UD_SUFFIX ))) // Hex Literals
87- .withChannel (regexp (CxxTokenType .NUMBER , "0" + opt (UD_SUFFIX ))) // Decimal zero
78+ // C++ Standard, Section 2.14.4 "Floating literals"
79+ .withChannel (
80+ regexp (CxxTokenType .NUMBER ,
81+ and (
82+ or (
83+ g (POINT , DECDIGIT_SEQUENCE , opt (g (EXPONENT ))),
84+ g (HEX_PREFIX , opt (g (HEXDIGIT_SEQUENCE )), opt (POINT ), opt (g (HEXDIGIT_SEQUENCE )), opt (g (BINARY_EXPONENT ))),
85+ g (BIN_PREFIX , BINDIGIT_SEQUENCE ),
86+ g (DECDIGIT_SEQUENCE , opt (POINT ), opt (g (DECDIGIT_SEQUENCE )), opt (g (EXPONENT )))
87+ ),
88+ opt (g (UD_SUFFIX ))
89+ )
90+ )
91+ )
8892
8993 .withChannel (new KeywordChannel (and ("#" , o2n ("\\ s" ), "[a-z]" , o2n ("\\ w" )), CppKeyword .values ()))
9094 .withChannel (new IdentifierAndKeywordChannel (and ("[a-zA-Z_]" , o2n ("\\ w" )), true ))
0 commit comments