Skip to content

Commit 6a97bd7

Browse files
authored
Merge pull request #262 from sysprog21/keyword-recognition
Add fast-path inline keyword recognition
2 parents 69dde47 + c0b73cb commit 6a97bd7

File tree

6 files changed

+91
-5
lines changed

6 files changed

+91
-5
lines changed

lib/c.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,19 @@ char *memcpy(char *dest, char *src, int count)
143143
return dest;
144144
}
145145

146+
int memcmp(void *s1, void *s2, int n)
147+
{
148+
char *p1 = (char *) s1, *p2 = (char *) s2;
149+
150+
for (int i = 0; i < n; i++) {
151+
if (p1[i] < p2[i])
152+
return -1;
153+
if (p1[i] > p2[i])
154+
return 1;
155+
}
156+
return 0;
157+
}
158+
146159
void *memset(void *s, int c, int n)
147160
{
148161
int i = 0;

src/lexer.c

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -902,7 +902,80 @@ token_t lex_token_internal(bool aliasing)
902902
token_str[i] = 0;
903903
skip_whitespace();
904904

905-
token_t keyword = lookup_keyword(token_str);
905+
/* Fast path for common keywords - avoid hashmap lookup */
906+
token_t keyword = T_identifier;
907+
int token_len = i; /* Length of the token string */
908+
909+
/* Check most common keywords inline based on token length and first
910+
* character.
911+
*/
912+
switch (token_len) {
913+
case 2: /* 2-letter keywords: if, do */
914+
if (token_str[0] == 'i' && token_str[1] == 'f')
915+
keyword = T_if;
916+
else if (token_str[0] == 'd' && token_str[1] == 'o')
917+
keyword = T_do;
918+
break;
919+
920+
case 3: /* 3-letter keywords: for */
921+
if (token_str[0] == 'f' && token_str[1] == 'o' &&
922+
token_str[2] == 'r')
923+
keyword = T_for;
924+
break;
925+
926+
case 4: /* 4-letter keywords: else, enum, case */
927+
if (token_str[0] == 'e') {
928+
if (!memcmp(token_str, "else", 4))
929+
keyword = T_else;
930+
else if (!memcmp(token_str, "enum", 4))
931+
keyword = T_enum;
932+
} else if (!memcmp(token_str, "case", 4))
933+
keyword = T_case;
934+
break;
935+
936+
case 5: /* 5-letter keywords: while, break, union */
937+
if (token_str[0] == 'w' && !memcmp(token_str, "while", 5))
938+
keyword = T_while;
939+
else if (token_str[0] == 'b' && !memcmp(token_str, "break", 5))
940+
keyword = T_break;
941+
else if (token_str[0] == 'u' && !memcmp(token_str, "union", 5))
942+
keyword = T_union;
943+
break;
944+
945+
case 6: /* 6-letter keywords: return, struct, switch, sizeof */
946+
if (token_str[0] == 'r' && !memcmp(token_str, "return", 6))
947+
keyword = T_return;
948+
else if (token_str[0] == 's') {
949+
if (!memcmp(token_str, "struct", 6))
950+
keyword = T_struct;
951+
else if (!memcmp(token_str, "switch", 6))
952+
keyword = T_switch;
953+
else if (!memcmp(token_str, "sizeof", 6))
954+
keyword = T_sizeof;
955+
}
956+
break;
957+
958+
case 7: /* 7-letter keywords: typedef, default */
959+
if (!memcmp(token_str, "typedef", 7))
960+
keyword = T_typedef;
961+
else if (!memcmp(token_str, "default", 7))
962+
keyword = T_default;
963+
break;
964+
965+
case 8: /* 8-letter keywords: continue */
966+
if (!memcmp(token_str, "continue", 8))
967+
keyword = T_continue;
968+
break;
969+
970+
default:
971+
/* Keywords longer than 8 chars or identifiers - use hashmap */
972+
break;
973+
}
974+
975+
/* Fall back to hashmap for uncommon keywords */
976+
if (keyword == T_identifier)
977+
keyword = lookup_keyword(token_str);
978+
906979
if (keyword != T_identifier)
907980
return keyword;
908981

tests/snapshots/fib-arm.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

tests/snapshots/fib-riscv.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

tests/snapshots/hello-arm.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

tests/snapshots/hello-riscv.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)