Skip to content

Commit 80c894d

Browse files
make the linker sort the statement keyword list
1 parent fbead1d commit 80c894d

File tree

5 files changed

+142
-205
lines changed

5 files changed

+142
-205
lines changed

.idea/inspectionProfiles/Project_Default.xml

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

buildtools/linker.ld

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ SECTIONS {
4242
KEEP (*(.fini_array))
4343
__FINI_END__ = .;
4444

45+
. = ALIGN(8);
46+
__start_kw_array = .;
47+
KEEP(*(SORT_BY_NAME(.kw.*)))
48+
__stop_kw_array = .;
49+
4550
. = ALIGN(0x1000);
4651
}
4752

include/basic/defines.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,3 +207,6 @@
207207
if (!accept(token, ctx)) { \
208208
return; \
209209
}
210+
211+
/* Drop each keyword into its own section, name = .kw.<keyword>. */
212+
#define DECL_KEYWORD(name) static const int kw_##name __attribute__((section(".kw." #name), used)) = name;

include/basic/tokenizer.h

Lines changed: 125 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,22 @@
3434
* The following #define generates an enum of token types, along with a corresponding array of string
3535
* representations for each token. These are used in basic.c to perform tokenization during program parsing.
3636
*/
37-
#define GENERATE_ENUM(ENUM) ENUM,
38-
#define GENERATE_STRING(STRING) #STRING,
37+
#define GENERATE_ENUM(NAME, ...) NAME,
38+
#define GENERATE_STRING(NAME, ...) #NAME,
3939
#define GENERATE_ENUM_LIST(MACRO, NAME) enum NAME { MACRO(GENERATE_ENUM) };
4040
#define GENERATE_ENUM_STRING_NAMES(MACRO, NAME) const char* NAME [] = { MACRO(GENERATE_STRING) };
4141

42+
/* Markers for readability in the TOKEN list */
43+
#define STMT STMT
44+
#define NONSTMT NONSTMT
45+
46+
/* Conditional emit: only statement keywords produce a const in .kw.<name> */
47+
#define EMIT_STMT(name) \
48+
static const int kw_##name __attribute__((section(".kw." #name), used)) = name;
49+
#define EMIT_NONSTMT(name) /* nothing */
50+
51+
#define EMIT_FROM_FLAG(name, flag) EMIT_##flag(name)
52+
4253
/**
4354
* @brief All tokens recognised by the interpreter. Note that built in function names are NOT
4455
* tokens, they are parsed like user functions, just with a hard coded handler instead of
@@ -48,117 +59,118 @@
4859
* enum, which is built and used within basic.c for tokenization.
4960
*/
5061
#define TOKEN(T) \
51-
T(NO_TOKEN) /* 0 */ \
52-
T(ERROR) /* 1 */ \
53-
T(ENDOFINPUT) /* 2 */ \
54-
T(NUMBER) /* 3 */ \
55-
T(HEXNUMBER) /* 4 */ \
56-
T(STRING) /* 5 */ \
57-
T(VARIABLE) /* 6 */ \
58-
T(LET) /* 7 */ \
59-
T(PRINT) /* 8 */ \
60-
T(IF) /* 9 */ \
61-
T(THEN) /* 10 */ \
62-
T(ELSE) /* 11 */ \
63-
T(CHAIN) /* 12 */ \
64-
T(FOR) /* 13 */ \
65-
T(STEP) /* 14 */ \
66-
T(TO) /* 15 */ \
67-
T(NEXT) /* 16 */ \
68-
T(CURSOR) /* 17 */ \
69-
T(GOTO) /* 18 */ \
70-
T(GOSUB) /* 19 */ \
71-
T(RETURN) /* 20 */ \
72-
T(CALL) /* 21 */ \
73-
T(INPUT) /* 22 */ \
74-
T(COLOUR) /* 23 */ \
75-
T(COLOR) /* 24 */ \
76-
T(BACKGROUND) /* 25 */ \
77-
T(EVAL) /* 26 */ \
78-
T(CLOSE) /* 27 */ \
79-
T(DEF) /* 28 */ \
80-
T(PROC) /* 29 */ \
81-
T(ENDPROC) /* 30 */ \
82-
T(FN) /* 31 */ \
83-
T(END) /* 32 */ \
84-
T(REM) /* 33 */ \
85-
T(COMMA) /* 34 */ \
86-
T(SEMICOLON) /* 35 */ \
87-
T(PLUS) /* 36 */ \
88-
T(MINUS) /* 37 */ \
89-
T(AND) /* 38 */ \
90-
T(OR) /* 39 */ \
91-
T(NOT) /* 40 */ \
92-
T(EOR) /* 41 */ \
93-
T(ASTERISK) /* 42 */ \
94-
T(SLASH) /* 43 */ \
95-
T(MOD) /* 44 */ \
96-
T(OPENBRACKET) /* 45 */ \
97-
T(CLOSEBRACKET) /* 46 */ \
98-
T(LESSTHAN) /* 47 */ \
99-
T(GREATERTHAN) /* 48 */ \
100-
T(EQUALS) /* 49 */ \
101-
T(NEWLINE) /* 50 */ \
102-
T(AMPERSAND) /* 51 */ \
103-
T(TILDE) /* 52 */ \
104-
T(GLOBAL) /* 53 */ \
105-
T(SOCKREAD) /* 54 */ \
106-
T(SOCKWRITE) /* 55 */ \
107-
T(CONNECT) /* 56 */ \
108-
T(SOCKCLOSE) /* 57 */ \
109-
T(CLS) /* 58 */ \
110-
T(GCOL) /* 59 */ \
111-
T(LINE) /* 60 */ \
112-
T(TRIANGLE) /* 61 */ \
113-
T(RECTANGLE) /* 62 */ \
114-
T(CIRCLE) /* 63 */ \
115-
T(POINT) /* 64 */ \
116-
T(DATA) /* 65 */ \
117-
T(RESTORE) /* 66 */ \
118-
T(WRITE) /* 67 */ \
119-
T(MKDIR) /* 68 */ \
120-
T(RMDIR) /* 69 */ \
121-
T(DELETE) /* 70 */ \
122-
T(REPEAT) /* 71 */ \
123-
T(UNTIL) /* 72 */ \
124-
T(DIM) /* 73 */ \
125-
T(REDIM) /* 74 */ \
126-
T(PUSH) /* 75 */ \
127-
T(POKE) /* 76 */ \
128-
T(POKEW) /* 77 */ \
129-
T(POKED) /* 78 */ \
130-
T(POKEQ) /* 79 */ \
131-
T(POP) /* 80 */ \
132-
T(LOCAL) /* 81 */ \
133-
T(CHDIR) /* 82 */ \
134-
T(LIBRARY) /* 83 */ \
135-
T(YIELD) /* 84 */ \
136-
T(SETVARI) /* 85 */ \
137-
T(SETVARR) /* 86 */ \
138-
T(SETVARS) /* 87 */ \
139-
T(SPRITELOAD) /* 88 */ \
140-
T(SPRITEFREE) /* 89 */ \
141-
T(PLOT) /* 90 */ \
142-
T(AUTOFLIP) /* 91 */ \
143-
T(FLIP) /* 92 */ \
144-
T(KEYMAP) /* 93 */ \
145-
T(MOUNT) /* 94 */ \
146-
T(SETTIMEZONE) /* 95 */ \
147-
T(ENDIF) /* 96 */ \
148-
T(PLOTQUAD) /* 97 */ \
149-
T(ON) /* 98 */ \
150-
T(OFF) /* 99 */ \
151-
T(WHILE) /* 100 */ \
152-
T(ENDWHILE) /* 101 */ \
153-
T(SLEEP) /* 102 */ \
154-
T(CONTINUE) /* 103 */ \
155-
T(UDPBIND) /* 104 */ \
156-
T(UDPUNBIND) /* 105 */ \
157-
T(UDPWRITE) /* 106 */ \
158-
T(OUTPORT) /* 107 */ \
159-
T(OUTPORTW) /* 108 */ \
160-
T(OUTPORTD) /* 109 */ \
161-
T(KGET) /* 110 */ \
62+
T(NO_TOKEN, NONSTMT) /* 0 */ \
63+
T(ERROR, STMT) /* 1 */ \
64+
T(ENDOFINPUT, NONSTMT) /* 2 */ \
65+
T(NUMBER, NONSTMT) /* 3 */ \
66+
T(HEXNUMBER, NONSTMT) /* 4 */ \
67+
T(STRING, NONSTMT) /* 5 */ \
68+
T(VARIABLE, NONSTMT) /* 6 */ \
69+
T(LET, STMT) /* 7 */ \
70+
T(PRINT, STMT) /* 8 */ \
71+
T(IF, STMT) /* 9 */ \
72+
T(THEN, STMT) /* 10 */ \
73+
T(ELSE, STMT) /* 11 */ \
74+
T(CHAIN, STMT) /* 12 */ \
75+
T(FOR, STMT) /* 13 */ \
76+
T(STEP, STMT) /* 14 */ \
77+
T(TO, STMT) /* 15 */ \
78+
T(NEXT, STMT) /* 16 */ \
79+
T(CURSOR, STMT) /* 17 */ \
80+
T(GOTO, STMT) /* 18 */ \
81+
T(GOSUB, STMT) /* 19 */ \
82+
T(RETURN, STMT) /* 20 */ \
83+
T(CALL, STMT) /* 21 */ \
84+
T(INPUT, STMT) /* 22 */ \
85+
T(COLOUR, STMT) /* 23 */ \
86+
T(COLOR, STMT) /* 24 */ \
87+
T(BACKGROUND, STMT) /* 25 */ \
88+
T(EVAL, STMT) /* 26 */ \
89+
T(CLOSE, STMT) /* 27 */ \
90+
T(DEF, STMT) /* 28 */ \
91+
T(PROC, STMT) /* 29 */ \
92+
T(ENDPROC, STMT) /* 30 */ \
93+
T(FN, NONSTMT) /* 31 */ \
94+
T(END, STMT) /* 32 */ \
95+
T(REM, STMT) /* 33 */ \
96+
T(COMMA, NONSTMT) /* 34 */ \
97+
T(SEMICOLON, NONSTMT) /* 35 */ \
98+
T(PLUS, NONSTMT) /* 36 */ \
99+
T(MINUS, NONSTMT) /* 37 */ \
100+
T(AND, STMT) /* 38 */ \
101+
T(OR, STMT) /* 39 */ \
102+
T(NOT, STMT) /* 40 */ \
103+
T(EOR, STMT) /* 41 */ \
104+
T(ASTERISK, NONSTMT) /* 42 */ \
105+
T(SLASH, NONSTMT) /* 43 */ \
106+
T(MOD, NONSTMT) /* 44 */ \
107+
T(OPENBRACKET, NONSTMT) /* 45 */ \
108+
T(CLOSEBRACKET, NONSTMT) /* 46 */ \
109+
T(LESSTHAN, NONSTMT) /* 47 */ \
110+
T(GREATERTHAN, NONSTMT) /* 48 */ \
111+
T(EQUALS, NONSTMT) /* 49 */ \
112+
T(NEWLINE, NONSTMT) /* 50 */ \
113+
T(AMPERSAND, NONSTMT) /* 51 */ \
114+
T(TILDE, NONSTMT) /* 52 */ \
115+
T(GLOBAL, STMT) /* 53 */ \
116+
T(SOCKREAD, STMT) /* 54 */ \
117+
T(SOCKWRITE, STMT) /* 55 */ \
118+
T(CONNECT, STMT) /* 56 */ \
119+
T(SOCKCLOSE, STMT) /* 57 */ \
120+
T(CLS, STMT) /* 58 */ \
121+
T(GCOL, STMT) /* 59 */ \
122+
T(LINE, STMT) /* 60 */ \
123+
T(TRIANGLE, STMT) /* 61 */ \
124+
T(RECTANGLE, STMT) /* 62 */ \
125+
T(CIRCLE, STMT) /* 63 */ \
126+
T(POINT, STMT) /* 64 */ \
127+
T(DATA, STMT) /* 65 */ \
128+
T(RESTORE, STMT) /* 66 */ \
129+
T(WRITE, STMT) /* 67 */ \
130+
T(MKDIR, STMT) /* 68 */ \
131+
T(RMDIR, STMT) /* 69 */ \
132+
T(DELETE, STMT) /* 70 */ \
133+
T(REPEAT, STMT) /* 71 */ \
134+
T(UNTIL, STMT) /* 72 */ \
135+
T(DIM, STMT) /* 73 */ \
136+
T(REDIM, STMT) /* 74 */ \
137+
T(PUSH, STMT) /* 75 */ \
138+
T(POKE, STMT) /* 76 */ \
139+
T(POKEW, STMT) /* 77 */ \
140+
T(POKED, STMT) /* 78 */ \
141+
T(POKEQ, STMT) /* 79 */ \
142+
T(POP, STMT) /* 80 */ \
143+
T(LOCAL, STMT) /* 81 */ \
144+
T(CHDIR, STMT) /* 82 */ \
145+
T(LIBRARY, STMT) /* 83 */ \
146+
T(YIELD, STMT) /* 84 */ \
147+
T(SETVARI, STMT) /* 85 */ \
148+
T(SETVARR, STMT) /* 86 */ \
149+
T(SETVARS, STMT) /* 87 */ \
150+
T(SPRITELOAD, STMT) /* 88 */ \
151+
T(SPRITEFREE, STMT) /* 89 */ \
152+
T(PLOT, STMT) /* 90 */ \
153+
T(AUTOFLIP, STMT) /* 91 */ \
154+
T(FLIP, STMT) /* 92 */ \
155+
T(KEYMAP, STMT) /* 93 */ \
156+
T(MOUNT, STMT) /* 94 */ \
157+
T(SETTIMEZONE, STMT) /* 95 */ \
158+
T(ENDIF, STMT) /* 96 */ \
159+
T(PLOTQUAD, STMT) /* 97 */ \
160+
T(ON, STMT) /* 98 */ \
161+
T(OFF, STMT) /* 99 */ \
162+
T(WHILE, STMT) /* 100 */ \
163+
T(ENDWHILE, STMT) /* 101 */ \
164+
T(SLEEP, STMT) /* 102 */ \
165+
T(CONTINUE, STMT) /* 103 */ \
166+
T(UDPBIND, STMT) /* 104 */ \
167+
T(UDPUNBIND, STMT) /* 105 */ \
168+
T(UDPWRITE, STMT) /* 106 */ \
169+
T(OUTPORT, STMT) /* 107 */ \
170+
T(OUTPORTW, STMT) /* 108 */ \
171+
T(OUTPORTD, STMT) /* 109 */ \
172+
T(KGET, STMT) /* 110 */ \
173+
162174

163175
GENERATE_ENUM_LIST(TOKEN, token_t)
164176

src/basic/tokenizer.c

Lines changed: 8 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -30,99 +30,15 @@ extern bool debug;
3030
*
3131
* @note It is important that these are alphabetically sorted.
3232
* It allows us to optimise the search.
33+
*
34+
* This is sorted at link-time by the linker script into an
35+
* .rodata section called .kw
3336
*/
34-
const int keywords[] = {
35-
AND,
36-
AUTOFLIP,
37-
BACKGROUND,
38-
CALL,
39-
CHAIN,
40-
CHDIR,
41-
CIRCLE,
42-
CLOSE,
43-
CLS,
44-
COLOR,
45-
COLOUR,
46-
CONNECT,
47-
CONTINUE,
48-
CURSOR,
49-
DATA,
50-
DEF,
51-
DELETE,
52-
DIM,
53-
ELSE,
54-
END,
55-
ENDIF,
56-
ENDPROC,
57-
ENDWHILE,
58-
EOR,
59-
ERROR,
60-
EVAL,
61-
FLIP,
62-
FOR,
63-
GCOL,
64-
GLOBAL,
65-
GOSUB,
66-
GOTO,
67-
IF,
68-
INPUT,
69-
KEYMAP,
70-
KGET,
71-
LET,
72-
LIBRARY,
73-
LINE,
74-
LOCAL,
75-
MKDIR,
76-
MOUNT,
77-
NEXT,
78-
NOT,
79-
OFF,
80-
ON,
81-
OR,
82-
OUTPORT,
83-
OUTPORTW,
84-
OUTPORTD,
85-
PLOT,
86-
PLOTQUAD,
87-
POINT,
88-
POKE,
89-
POKED,
90-
POKEQ,
91-
POKEW,
92-
POP,
93-
PRINT,
94-
PROC,
95-
PUSH,
96-
RECTANGLE,
97-
REDIM,
98-
REM,
99-
REPEAT,
100-
RESTORE,
101-
RETURN,
102-
RMDIR,
103-
SETTIMEZONE,
104-
SETVARI,
105-
SETVARR,
106-
SETVARS,
107-
SLEEP,
108-
SOCKCLOSE,
109-
SOCKREAD,
110-
SOCKWRITE,
111-
SPRITEFREE,
112-
SPRITELOAD,
113-
STEP,
114-
THEN,
115-
TO,
116-
TRIANGLE,
117-
UDPBIND,
118-
UDPUNBIND,
119-
UDPWRITE,
120-
UNTIL,
121-
WHILE,
122-
WRITE,
123-
YIELD,
124-
-1,
125-
};
37+
TOKEN(EMIT_FROM_FLAG)
38+
static const int kw_sentinel __attribute__((section(".kw.~zzzzzz"), used)) = -1;
39+
extern const int __start_kw_array[];
40+
extern const int __stop_kw_array[];
41+
static const int *keywords = __start_kw_array;
12642

12743
static int singlechar(struct basic_ctx* ctx)
12844
{

0 commit comments

Comments
 (0)