|
12 | 12 |
|
13 | 13 | /* Hash table constants */
|
14 | 14 | #define NUM_DIRECTIVES 11
|
15 |
| -#define NUM_KEYWORDS 15 |
| 15 | +#define NUM_KEYWORDS 16 |
16 | 16 |
|
17 | 17 | /* Preprocessor directive hash table using existing shecc hashmap */
|
18 | 18 | hashmap_t *DIRECTIVE_MAP = NULL;
|
@@ -112,6 +112,8 @@ void lex_init_keywords()
|
112 | 112 | token_values[13] = T_default;
|
113 | 113 | names[14] = "continue";
|
114 | 114 | token_values[14] = T_continue;
|
| 115 | + names[15] = "union"; |
| 116 | + token_values[15] = T_union; |
115 | 117 |
|
116 | 118 | /* hashmap insertion */
|
117 | 119 | for (int i = 0; i < NUM_KEYWORDS; i++) {
|
@@ -203,6 +205,17 @@ bool is_hex(char c)
|
203 | 205 | (c >= 'A' && c <= 'F');
|
204 | 206 | }
|
205 | 207 |
|
| 208 | +int hex_digit_value(char c) |
| 209 | +{ |
| 210 | + if (c >= '0' && c <= '9') |
| 211 | + return c - '0'; |
| 212 | + if (c >= 'a' && c <= 'f') |
| 213 | + return c - 'a' + 10; |
| 214 | + if (c >= 'A' && c <= 'F') |
| 215 | + return c - 'A' + 10; |
| 216 | + return -1; |
| 217 | +} |
| 218 | + |
206 | 219 | bool is_numeric(char buffer[])
|
207 | 220 | {
|
208 | 221 | bool hex = false;
|
@@ -330,8 +343,21 @@ token_t lex_token_internal(bool aliasing)
|
330 | 343 | token_str[i++] = next_char;
|
331 | 344 | } while (is_hex(read_char(false)));
|
332 | 345 |
|
| 346 | + } else if (token_str[0] == '0' && ((next_char | 32) == 'b')) { |
| 347 | + /* Binary: starts with 0b or 0B */ |
| 348 | + token_str[i++] = next_char; |
| 349 | + |
| 350 | + read_char(false); |
| 351 | + if (next_char != '0' && next_char != '1') |
| 352 | + error("Invalid binary literal: expected 0 or 1 after 0b"); |
| 353 | + |
| 354 | + do { |
| 355 | + token_str[i++] = next_char; |
| 356 | + read_char(false); |
| 357 | + } while (next_char == '0' || next_char == '1'); |
| 358 | + |
333 | 359 | } else if (token_str[0] == '0') {
|
334 |
| - /* Octal: starts with 0 but not followed by 'x' */ |
| 360 | + /* Octal: starts with 0 but not followed by 'x' or 'b' */ |
335 | 361 | while (is_digit(next_char)) {
|
336 | 362 | if (next_char >= '8')
|
337 | 363 | error("Invalid octal digit: must be in range 0-7");
|
@@ -413,8 +439,58 @@ token_t lex_token_internal(bool aliasing)
|
413 | 439 | token_str[i - 1] = '\\';
|
414 | 440 | else if (next_char == '0')
|
415 | 441 | token_str[i - 1] = '\0';
|
416 |
| - else |
417 |
| - abort(); |
| 442 | + else if (next_char == 'a') |
| 443 | + token_str[i - 1] = '\a'; |
| 444 | + else if (next_char == 'b') |
| 445 | + token_str[i - 1] = '\b'; |
| 446 | + else if (next_char == 'v') |
| 447 | + token_str[i - 1] = '\v'; |
| 448 | + else if (next_char == 'f') |
| 449 | + token_str[i - 1] = '\f'; |
| 450 | + else if (next_char == 'e') /* GNU extension: ESC character */ |
| 451 | + token_str[i - 1] = 27; |
| 452 | + else if (next_char == '?') |
| 453 | + token_str[i - 1] = '?'; |
| 454 | + else if (next_char == 'x') { |
| 455 | + /* Hexadecimal escape sequence \xHH */ |
| 456 | + read_char(false); |
| 457 | + if (!is_hex(next_char)) |
| 458 | + error("Invalid hex escape sequence"); |
| 459 | + int value = 0; |
| 460 | + int count = 0; |
| 461 | + while (is_hex(next_char) && count < 2) { |
| 462 | + value = (value << 4) + hex_digit_value(next_char); |
| 463 | + read_char(false); |
| 464 | + count++; |
| 465 | + } |
| 466 | + token_str[i - 1] = value; |
| 467 | + /* Back up one character as we read one too many */ |
| 468 | + SOURCE->size--; |
| 469 | + next_char = SOURCE->elements[SOURCE->size]; |
| 470 | + } else if (next_char >= '0' && next_char <= '7') { |
| 471 | + /* Octal escape sequence \nnn */ |
| 472 | + int value = next_char - '0'; |
| 473 | + read_char(false); |
| 474 | + if (next_char >= '0' && next_char <= '7') { |
| 475 | + value = (value << 3) + (next_char - '0'); |
| 476 | + read_char(false); |
| 477 | + if (next_char >= '0' && next_char <= '7') { |
| 478 | + value = (value << 3) + (next_char - '0'); |
| 479 | + } else { |
| 480 | + /* Back up one character */ |
| 481 | + SOURCE->size--; |
| 482 | + next_char = SOURCE->elements[SOURCE->size]; |
| 483 | + } |
| 484 | + } else { |
| 485 | + /* Back up one character */ |
| 486 | + SOURCE->size--; |
| 487 | + next_char = SOURCE->elements[SOURCE->size]; |
| 488 | + } |
| 489 | + token_str[i - 1] = value; |
| 490 | + } else { |
| 491 | + /* Handle unknown escapes gracefully */ |
| 492 | + token_str[i - 1] = next_char; |
| 493 | + } |
418 | 494 | } else {
|
419 | 495 | token_str[i++] = next_char;
|
420 | 496 | }
|
@@ -445,8 +521,58 @@ token_t lex_token_internal(bool aliasing)
|
445 | 521 | token_str[0] = '\\';
|
446 | 522 | else if (next_char == '0')
|
447 | 523 | token_str[0] = '\0';
|
448 |
| - else |
449 |
| - abort(); |
| 524 | + else if (next_char == 'a') |
| 525 | + token_str[0] = '\a'; |
| 526 | + else if (next_char == 'b') |
| 527 | + token_str[0] = '\b'; |
| 528 | + else if (next_char == 'v') |
| 529 | + token_str[0] = '\v'; |
| 530 | + else if (next_char == 'f') |
| 531 | + token_str[0] = '\f'; |
| 532 | + else if (next_char == 'e') /* GNU extension: ESC character */ |
| 533 | + token_str[0] = 27; |
| 534 | + else if (next_char == '?') |
| 535 | + token_str[0] = '?'; |
| 536 | + else if (next_char == 'x') { |
| 537 | + /* Hexadecimal escape sequence \xHH */ |
| 538 | + read_char(false); |
| 539 | + if (!is_hex(next_char)) |
| 540 | + error("Invalid hex escape sequence"); |
| 541 | + int value = 0; |
| 542 | + int count = 0; |
| 543 | + while (is_hex(next_char) && count < 2) { |
| 544 | + value = (value << 4) + hex_digit_value(next_char); |
| 545 | + read_char(false); |
| 546 | + count++; |
| 547 | + } |
| 548 | + token_str[0] = value; |
| 549 | + /* Back up one character as we read one too many */ |
| 550 | + SOURCE->size--; |
| 551 | + next_char = SOURCE->elements[SOURCE->size]; |
| 552 | + } else if (next_char >= '0' && next_char <= '7') { |
| 553 | + /* Octal escape sequence \nnn */ |
| 554 | + int value = next_char - '0'; |
| 555 | + read_char(false); |
| 556 | + if (next_char >= '0' && next_char <= '7') { |
| 557 | + value = (value << 3) + (next_char - '0'); |
| 558 | + read_char(false); |
| 559 | + if (next_char >= '0' && next_char <= '7') { |
| 560 | + value = (value << 3) + (next_char - '0'); |
| 561 | + } else { |
| 562 | + /* Back up one character */ |
| 563 | + SOURCE->size--; |
| 564 | + next_char = SOURCE->elements[SOURCE->size]; |
| 565 | + } |
| 566 | + } else { |
| 567 | + /* Back up one character */ |
| 568 | + SOURCE->size--; |
| 569 | + next_char = SOURCE->elements[SOURCE->size]; |
| 570 | + } |
| 571 | + token_str[0] = value; |
| 572 | + } else { |
| 573 | + /* Handle unknown escapes gracefully */ |
| 574 | + token_str[0] = next_char; |
| 575 | + } |
450 | 576 | } else {
|
451 | 577 | token_str[0] = next_char;
|
452 | 578 | }
|
|
0 commit comments