@@ -243,9 +243,9 @@ void Lexer::scan_preprocessor()
243243 reportError (" expected newline" );
244244}
245245
246- void Lexer::scan_char_constant ( )
246+ void Lexer::scan_char_constant_with_prefix ( const unsigned char * prefix )
247247{
248- const unsigned char *begin = cursor;
248+ const unsigned char *begin = prefix ? prefix : cursor;
249249
250250 ++cursor;
251251 while (*cursor && *cursor != ' \' ' )
@@ -269,9 +269,9 @@ void Lexer::scan_char_constant()
269269 token_stream[(int ) index++].kind = Token_char_literal;
270270}
271271
272- void Lexer::scan_string_constant ( )
272+ void Lexer::scan_string_constant_with_prefix ( const unsigned char * prefix )
273273{
274- const unsigned char *begin = cursor;
274+ const unsigned char *begin = prefix ? prefix : cursor;
275275
276276 ++cursor;
277277 while (*cursor && *cursor != ' "' )
@@ -295,6 +295,71 @@ void Lexer::scan_string_constant()
295295 token_stream[(int ) index++].kind = Token_string_literal;
296296}
297297
298+ void Lexer::scan_raw_string_constant_with_prefix (const unsigned char * prefix)
299+ {
300+ // always starts with "
301+ const unsigned char * begin = prefix ? prefix : cursor;
302+ int delimiterLength = 0 ;
303+ int endSequenceLength = 0 ;
304+ bool stillValidDelimiter = true ;
305+ ++cursor;
306+ while (*cursor)
307+ {
308+ if (!delimiterLength)
309+ {
310+ if (*cursor == ' "' )
311+ {
312+ break ;
313+ }
314+ else if (*cursor == ' \n ' )
315+ {
316+ // this would probably not be a valid delimiter
317+ stillValidDelimiter = false ;
318+ }
319+ else if (stillValidDelimiter && *cursor == ' (' && (cursor - begin) < 16 )
320+ {
321+ // delimiter sequence identified (see https://en.cppreference.com/w/cpp/language/string_literal)
322+ delimiterLength = cursor - begin;
323+ }
324+ }
325+ else if (endSequenceLength)
326+ {
327+ // possible end delimiter sequence
328+ if (endSequenceLength == delimiterLength && *cursor == ' "' )
329+ {
330+ break ;
331+ }
332+ else if (endSequenceLength < delimiterLength && *cursor == begin[endSequenceLength])
333+ {
334+ endSequenceLength++;
335+ }
336+ else
337+ {
338+ // this is not the end of the string, go back to
339+ // after the starting ')' and try again
340+ cursor -= endSequenceLength;
341+ endSequenceLength = 0 ;
342+ }
343+ }
344+ else if (*cursor == ' )' )
345+ {
346+ // this might be the start of the end delimiter sequence
347+ endSequenceLength = 1 ;
348+ }
349+ ++cursor;
350+ }
351+
352+ if (*cursor != ' "' )
353+ reportError (" expected \" " );
354+
355+ ++cursor;
356+
357+ token_stream[(int )index].extra .symbol =
358+ control->findOrInsertName ((const char *)begin, cursor - begin);
359+
360+ token_stream[(int )index++].kind = Token_string_literal;
361+ }
362+
298363void Lexer::scan_newline ()
299364{
300365 if (location_table.current_line == location_table.size ())
@@ -338,20 +403,44 @@ void Lexer::scan_identifier_or_literal()
338403void Lexer::scan_identifier_or_keyword ()
339404{
340405 const unsigned char *skip = cursor;
406+ const unsigned char * start = cursor;
341407 while (isalnum (*skip) || *skip== ' _' )
342408 ++skip;
343409
344410 int n = skip - cursor;
345- Token *current_token = &token_stream[(int ) index];
346- (this ->*s_scan_keyword_table[n < 17 ? n : 0 ])();
411+ if (*skip == ' "' && n <= 3 )
412+ {
413+ cursor = skip;
414+ // this should be a unicode and/or raw string -
415+ // we pass through anything that does not follow the standard, though
416+ if (skip[-1 ] == ' R' )
417+ {
418+ scan_raw_string_constant_with_prefix (start);
419+ }
420+ else
421+ {
422+ scan_string_constant_with_prefix (start);
423+ }
424+ }
425+ else if (*skip == ' \' ' && n <= 2 )
426+ {
427+ // probably some special encoding
428+ cursor = skip;
429+ scan_char_constant_with_prefix (start);
430+ }
431+ else
432+ {
433+ Token* current_token = &token_stream[(int )index];
434+ (this ->*s_scan_keyword_table[n < 17 ? n : 0 ])();
347435
348- if (current_token->kind == Token_identifier)
436+ if (current_token->kind == Token_identifier)
349437 {
350438 current_token->extra .symbol =
351- control->findOrInsertName ((const char *) cursor, n);
439+ control->findOrInsertName ((const char *)cursor, n);
352440 }
353441
354- cursor = skip;
442+ cursor = skip;
443+ }
355444}
356445
357446void Lexer::scan_int_constant ()
0 commit comments