From 49bcd8baa7bff7b2b043711a5dbec6c5c059c8d0 Mon Sep 17 00:00:00 2001 From: Shi-Sheng Yang Date: Sun, 13 Jul 2025 14:16:22 +0800 Subject: [PATCH 1/2] Fix parsing to support uppercase 0X hex literals Previously, literals with uppercase 0X prefix (e.g., "0XABC") were misparsed as invalid. This commit fixes that and improves compatibility with C99-style numeric constants. - Removed incorrect inclusion of 'x' as a hex digit in is_hex() - Updated is_numeric() to skip 0x/0X prefix when validating hex digits - Adjusted read_numeric_constant() to consistently handle both 0x and 0X prefixes --- src/lexer.c | 43 ++++++++++++++++++++++++++++++++++++------- src/parser.c | 4 ++-- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/lexer.c b/src/lexer.c index e2854f34..7f3c2b24 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -42,8 +42,8 @@ bool is_digit(char c) bool is_hex(char c) { - return ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || c == 'x' || - (c >= 'A' && c <= 'F')); + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || + (c >= 'A' && c <= 'F'); } bool is_numeric(char buffer[]) @@ -51,10 +51,10 @@ bool is_numeric(char buffer[]) bool hex = false; int size = strlen(buffer); - if (size > 2) - hex = !strncmp(buffer, "0x", 2); + if (size > 2 && buffer[0] == '0' && (buffer[1] | 32) == 'x') + hex = true; - for (int i = 0; i < size; i++) { + for (int i = hex ? 2 : 0; i < size; i++) { if (hex && !is_hex(buffer[i])) return false; if (!hex && !is_digit(buffer[i])) @@ -177,9 +177,38 @@ token_t lex_token_internal(bool aliasing) if (is_digit(next_char)) { int i = 0; - do { + token_str[i++] = next_char; + read_char(false); + + if (token_str[0] == '0' && ((next_char | 32) == 'x')) { + /* Hexadecimal: starts with 0x or 0X */ token_str[i++] = next_char; - } while (is_hex(read_char(false))); + + read_char(false); + if (!is_hex(next_char)) + error("Invalid hex literal: expected hex digit after 0x"); + + do { + token_str[i++] = next_char; + } while (is_hex(read_char(false))); + + } else if (token_str[0] == '0') { + /* Octal: starts with 0 but not followed by 'x' */ + while (is_digit(next_char)) { + if (next_char >= '8') + error("Invalid octal digit: must be in range 0-7"); + token_str[i++] = next_char; + read_char(false); + } + + } else { + /* Decimal */ + while (is_digit(next_char)) { + token_str[i++] = next_char; + read_char(false); + } + } + token_str[i] = 0; skip_whitespace(); return T_numeric; diff --git a/src/parser.c b/src/parser.c index afa9b2ed..be032d18 100644 --- a/src/parser.c +++ b/src/parser.c @@ -295,7 +295,7 @@ int read_numeric_constant(char buffer[]) int i = 0; int value = 0; while (buffer[i]) { - if (i == 1 && (buffer[i] == 'x')) { /* hexadecimal */ + if (i == 1 && (buffer[i] | 32) == 'x') { /* hexadecimal */ value = 0; i = 2; while (buffer[i]) { @@ -784,7 +784,7 @@ void read_numeric_param(block_t *parent, basic_block_t *bb, int is_neg) i++; } if (token[0] == '0') { - if (token[1] == 'x') { /* hexdecimal */ + if ((token[1] | 32) == 'x') { /* hexdecimal */ i = 2; do { c = token[i++]; From ba2c2dc94b4158b555a0a9961ddef3a5d349ad85 Mon Sep 17 00:00:00 2001 From: Shi-Sheng Yang Date: Mon, 14 Jul 2025 14:54:23 +0800 Subject: [PATCH 2/2] Add test cases for 0X-style hex literals Add tests to verify valid and invalid forms of hex literals. --- tests/driver.sh | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/driver.sh b/tests/driver.sh index b32eee4d..fc1bbfed 100755 --- a/tests/driver.sh +++ b/tests/driver.sh @@ -1855,4 +1855,32 @@ int main(void) } EOF +try_output 0 "2748 6719 105884 0" << EOF +int main() +{ + int a = 0XABC; + int b = 0X1a3f; + int c = 0XDEaD + 0xBeEF; + int d = 0X0; + printf("%d %d %d %d", a, b, c, d); + return 0; +} +EOF + +try_compile_error << EOF +int main() +{ + int x = 0X; + return 0; +} +EOF + +try_compile_error << EOF +int main() +{ + int x = 0XGHI; + return 0; +} +EOF + echo OK