Skip to content

Commit 75cdc97

Browse files
committed
Support .rodata/.bss section for generation
This commit adds infrastructure for .rodata (read-only data) and .bss (uninitialized data) sections and implements OP_load_rodata_address for addressing constants in .rodata. Then, it moves string literals from .data to .rodata section for better memory protection. TODO: implement optimizations for global variable initialization with compile-time constants.
1 parent 301a225 commit 75cdc97

File tree

10 files changed

+137
-42
lines changed

10 files changed

+137
-42
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,6 @@ int fib(int n) def int @fib(int %n)
237237
238238
## Known Issues
239239
240-
1. The generated ELF lacks .bss and .rodata sections
241240
2. Full `<stdarg.h>` support is not available. Variadic functions work via direct pointer arithmetic.
242241
See the `printf` implementation in `lib/c.c` for the supported approach.
243242
3. The C front-end operates directly on token streams without building a full AST.

src/arm-codegen.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ void update_elf_offset(ph2_ir_t *ph2_ir)
9898
elf_offset += 116;
9999
return;
100100
case OP_load_data_address:
101+
case OP_load_rodata_address:
101102
elf_offset += 8;
102103
return;
103104
case OP_address_of_func:
@@ -190,7 +191,7 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
190191
func_t *func;
191192
const int rd = ph2_ir->dest;
192193
const int rn = ph2_ir->src0;
193-
const int rm = ph2_ir->src1;
194+
int rm = ph2_ir->src1; /* Not const because OP_trunc modifies it */
194195
int ofs;
195196

196197
/* Prepare this variable to reuse code for:
@@ -288,6 +289,10 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
288289
emit(__movw(__AL, rd, ph2_ir->src0 + elf_data_start));
289290
emit(__movt(__AL, rd, ph2_ir->src0 + elf_data_start));
290291
return;
292+
case OP_load_rodata_address:
293+
emit(__movw(__AL, rd, ph2_ir->src0 + elf_rodata_start));
294+
emit(__movt(__AL, rd, ph2_ir->src0 + elf_rodata_start));
295+
return;
291296
case OP_address_of_func:
292297
func = find_func(ph2_ir->func_name);
293298
ofs = elf_code_start + func->bbs->elf_offset;
@@ -310,7 +315,7 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
310315
emit(__movt(__AL, __r8, ph2_ir->src1 + 4));
311316
emit(__add_r(__AL, __sp, __sp, __r8));
312317
emit(__lw(__AL, __lr, __sp, -4));
313-
emit(__blx(__AL, __lr));
318+
emit(__bx(__AL, __lr));
314319
return;
315320
case OP_add:
316321
emit(__add_r(__AL, rd, rn, rm));
@@ -450,6 +455,8 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
450455
void code_generate(void)
451456
{
452457
elf_data_start = elf_code_start + elf_offset;
458+
elf_rodata_start = elf_data_start + elf_data->size;
459+
elf_bss_start = elf_rodata_start + elf_rodata->size;
453460

454461
/* start */
455462
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
@@ -477,7 +484,7 @@ void code_generate(void)
477484
emit(__mov_r(__AL, __r4, __r5));
478485
emit(__mov_r(__AL, __r5, __r6));
479486
emit(__svc());
480-
emit(__mov_r(__AL, __pc, __lr));
487+
emit(__bx(__AL, __lr));
481488

482489
ph2_ir_t *ph2_ir;
483490
for (ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;

src/defs.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,9 @@ typedef enum {
261261

262262
OP_allocat, /* allocate space on stack */
263263
OP_assign,
264-
OP_load_constant, /* load constant */
265-
OP_load_data_address, /* lookup address of a constant in data section */
264+
OP_load_constant, /* load constant */
265+
OP_load_data_address, /* lookup address of a constant in data section */
266+
OP_load_rodata_address, /* lookup address of a constant in rodata section */
266267

267268
/* control flow */
268269
OP_branch, /* conditional jump */

src/elf.c

Lines changed: 82 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,7 @@ void elf_generate_header(void)
6464
}
6565

6666
elf32_hdr_t hdr;
67-
/*
68-
* The following table explains the meaning of each field in the
67+
/* The following table explains the meaning of each field in the
6968
* ELF32 file header.
7069
*
7170
* Notice that the following values are hexadecimal.
@@ -134,26 +133,34 @@ void elf_generate_header(void)
134133
hdr.e_version = 1; /* ELF version */
135134
hdr.e_entry = ELF_START + elf_header_len; /* entry point */
136135
hdr.e_phoff = 0x34; /* program header offset */
137-
hdr.e_shoff = elf_header_len + elf_code->size + elf_data->size + 39 +
138-
elf_symtab->size +
139-
elf_strtab->size; /* section header offset */
140-
hdr.e_flags = ELF_FLAGS; /* flags */
141-
hdr.e_ehsize[0] = (char) 0x34; /* header size */
136+
/* Section header offset: The section headers come after symtab, strtab, and
137+
* shstrtab which are all written as part of elf_section buffer.
138+
* shstrtab size = 1 (null) + 10 (.shstrtab\0) + 6 (.text\0) + 6 (.data\0) +
139+
* 8 (.rodata\0) + 5 (.bss\0) + 8 (.symtab\0) + 8
140+
* (.strtab\0) + 1 (padding) = 53
141+
*/
142+
const int shstrtab_size = 53; /* section header string table with padding */
143+
hdr.e_shoff = elf_header_len + elf_code->size + elf_data->size +
144+
elf_rodata->size + elf_symtab->size + elf_strtab->size +
145+
shstrtab_size;
146+
hdr.e_flags = ELF_FLAGS; /* flags */
147+
hdr.e_ehsize[0] = (char) 0x34; /* header size */
142148
hdr.e_ehsize[1] = 0;
143149
hdr.e_phentsize[0] = (char) 0x20; /* program header size */
144150
hdr.e_phentsize[1] = 0;
145151
hdr.e_phnum[0] = 1; /* number of program headers */
146152
hdr.e_phnum[1] = 0;
147153
hdr.e_shentsize[0] = (char) 0x28; /* section header size */
148154
hdr.e_shentsize[1] = 0;
149-
hdr.e_shnum[0] = 6; /* number of section headers */
155+
/* number of section headers: .rodata and .bss included */
156+
hdr.e_shnum[0] = 8;
150157
hdr.e_shnum[1] = 0;
151-
hdr.e_shstrndx[0] = 5; /* section index with names */
158+
/* section index with names: updated for new sections */
159+
hdr.e_shstrndx[0] = 7;
152160
hdr.e_shstrndx[1] = 0;
153161
elf_write_blk(elf_header, &hdr, sizeof(elf32_hdr_t));
154162

155-
/*
156-
* Explain the meaning of each field in the ELF32 program header.
163+
/* Explain the meaning of each field in the ELF32 program header.
157164
*
158165
* | Program | |
159166
* & | Header bytes | Explanation |
@@ -176,14 +183,16 @@ void elf_generate_header(void)
176183
*/
177184
/* program header - code and data combined */
178185
elf32_phdr_t phdr;
179-
phdr.p_type = 1; /* PT_LOAD */
180-
phdr.p_offset = elf_header_len; /* offset of segment */
181-
phdr.p_vaddr = ELF_START + elf_header_len; /* virtual address */
182-
phdr.p_paddr = ELF_START + elf_header_len; /* physical address */
183-
phdr.p_filesz = elf_code->size + elf_data->size; /* size in file */
184-
phdr.p_memsz = elf_code->size + elf_data->size; /* size in memory */
185-
phdr.p_flags = 7; /* flags */
186-
phdr.p_align = 4; /* alignment */
186+
phdr.p_type = 1; /* PT_LOAD */
187+
phdr.p_offset = elf_header_len; /* offset of segment */
188+
phdr.p_vaddr = ELF_START + elf_header_len; /* virtual address */
189+
phdr.p_paddr = ELF_START + elf_header_len; /* physical address */
190+
phdr.p_filesz = elf_code->size + elf_data->size +
191+
elf_rodata->size; /* size in file - includes .rodata */
192+
phdr.p_memsz = elf_code->size + elf_data->size + elf_rodata->size +
193+
elf_bss_size; /* size in memory - includes .bss */
194+
phdr.p_flags = 7; /* flags */
195+
phdr.p_align = 4; /* alignment */
187196
elf_write_blk(elf_header, &phdr, sizeof(elf32_phdr_t));
188197
}
189198

@@ -195,26 +204,39 @@ void elf_generate_sections(void)
195204
return;
196205
}
197206

207+
int section_data_size = 0;
208+
int shstrtab_start = 0; /* Track start of shstrtab */
209+
198210
/* symtab section */
199211
for (int b = 0; b < elf_symtab->size; b++)
200212
elf_write_byte(elf_section, elf_symtab->elements[b]);
213+
section_data_size += elf_symtab->size;
201214

202215
/* strtab section */
203216
for (int b = 0; b < elf_strtab->size; b++)
204217
elf_write_byte(elf_section, elf_strtab->elements[b]);
218+
section_data_size += elf_strtab->size;
205219

206-
/* shstr section; len = 39 */
220+
/* shstr section - compute size dynamically */
221+
shstrtab_start = elf_section->size;
207222
elf_write_byte(elf_section, 0);
208223
elf_write_str(elf_section, ".shstrtab");
209224
elf_write_byte(elf_section, 0);
210225
elf_write_str(elf_section, ".text");
211226
elf_write_byte(elf_section, 0);
212227
elf_write_str(elf_section, ".data");
213228
elf_write_byte(elf_section, 0);
229+
elf_write_str(elf_section, ".rodata");
230+
elf_write_byte(elf_section, 0);
231+
elf_write_str(elf_section, ".bss");
232+
elf_write_byte(elf_section, 0);
214233
elf_write_str(elf_section, ".symtab");
215234
elf_write_byte(elf_section, 0);
216235
elf_write_str(elf_section, ".strtab");
217236
elf_write_byte(elf_section, 0);
237+
/* Add padding byte for alignment - some tools expect this */
238+
elf_write_byte(elf_section, 0);
239+
int shstrtab_size = elf_section->size - shstrtab_start;
218240

219241
/* section header table */
220242
elf32_shdr_t shdr;
@@ -288,22 +310,51 @@ void elf_generate_sections(void)
288310
elf_write_blk(elf_section, &shdr, sizeof(elf32_shdr_t));
289311
ofs += elf_data->size;
290312

313+
/* .rodata */
314+
shdr.sh_name = 0x17; /* Offset in shstrtab for ".rodata" */
315+
shdr.sh_type = 1; /* SHT_PROGBITS */
316+
shdr.sh_flags = 2; /* SHF_ALLOC only (read-only) */
317+
shdr.sh_addr = elf_code_start + elf_code->size + elf_data->size;
318+
shdr.sh_offset = ofs;
319+
shdr.sh_size = elf_rodata->size;
320+
shdr.sh_link = 0;
321+
shdr.sh_info = 0;
322+
shdr.sh_addralign = 4;
323+
shdr.sh_entsize = 0;
324+
elf_write_blk(elf_section, &shdr, sizeof(elf32_shdr_t));
325+
ofs += elf_rodata->size;
326+
327+
/* .bss */
328+
shdr.sh_name = 0x1f; /* Offset in shstrtab for ".bss" */
329+
shdr.sh_type = 8; /* SHT_NOBITS */
330+
shdr.sh_flags = 3; /* SHF_ALLOC | SHF_WRITE */
331+
shdr.sh_addr =
332+
elf_code_start + elf_code->size + elf_data->size + elf_rodata->size;
333+
shdr.sh_offset = ofs; /* File offset (not actually used for NOBITS) */
334+
shdr.sh_size = elf_bss_size;
335+
shdr.sh_link = 0;
336+
shdr.sh_info = 0;
337+
shdr.sh_addralign = 4;
338+
shdr.sh_entsize = 0;
339+
elf_write_blk(elf_section, &shdr, sizeof(elf32_shdr_t));
340+
/* Note: .bss is not written to file (SHT_NOBITS) */
341+
291342
/* .symtab */
292-
shdr.sh_name = 0x17;
343+
shdr.sh_name = 0x24; /* Updated offset for ".symtab" */
293344
shdr.sh_type = 2;
294345
shdr.sh_flags = 0;
295346
shdr.sh_addr = 0;
296347
shdr.sh_offset = ofs;
297348
shdr.sh_size = elf_symtab->size;
298-
shdr.sh_link = 4;
349+
shdr.sh_link = 6; /* Link to .strtab (section 6) */
299350
shdr.sh_info = elf_symbol_index;
300351
shdr.sh_addralign = 4;
301352
shdr.sh_entsize = 16;
302353
elf_write_blk(elf_section, &shdr, sizeof(elf32_shdr_t));
303354
ofs += elf_symtab->size;
304355

305356
/* .strtab */
306-
shdr.sh_name = 0x1f;
357+
shdr.sh_name = 0x2c; /* Updated offset for ".strtab" */
307358
shdr.sh_type = 3;
308359
shdr.sh_flags = 0;
309360
shdr.sh_addr = 0;
@@ -322,7 +373,7 @@ void elf_generate_sections(void)
322373
shdr.sh_flags = 0;
323374
shdr.sh_addr = 0;
324375
shdr.sh_offset = ofs;
325-
shdr.sh_size = 39;
376+
shdr.sh_size = shstrtab_size; /* Computed dynamically */
326377
shdr.sh_link = 0;
327378
shdr.sh_info = 0;
328379
shdr.sh_addralign = 1;
@@ -333,14 +384,17 @@ void elf_generate_sections(void)
333384
void elf_align(void)
334385
{
335386
/* Check for null pointers to prevent crashes */
336-
if (!elf_data || !elf_symtab || !elf_strtab) {
387+
if (!elf_data || !elf_rodata || !elf_symtab || !elf_strtab) {
337388
error("ELF buffers not initialized for alignment");
338389
return;
339390
}
340391

341392
while (elf_data->size & 3)
342393
elf_write_byte(elf_data, 0);
343394

395+
while (elf_rodata->size & 3)
396+
elf_write_byte(elf_rodata, 0);
397+
344398
while (elf_symtab->size & 3)
345399
elf_write_byte(elf_symtab, 0);
346400

@@ -387,6 +441,9 @@ void elf_generate(const char *outfile)
387441
fputc(elf_code->elements[i], fp);
388442
for (int i = 0; i < elf_data->size; i++)
389443
fputc(elf_data->elements[i], fp);
444+
for (int i = 0; i < elf_rodata->size; i++)
445+
fputc(elf_rodata->elements[i], fp);
446+
/* Note: .bss is not written to file (SHT_NOBITS) */
390447
for (int i = 0; i < elf_section->size; i++)
391448
fputc(elf_section->elements[i], fp);
392449
fclose(fp);

src/globals.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,17 @@ hashmap_t *INCLUSION_MAP;
9393
/* ELF sections */
9494
strbuf_t *elf_code;
9595
strbuf_t *elf_data;
96+
strbuf_t *elf_rodata;
9697
strbuf_t *elf_header;
9798
strbuf_t *elf_symtab;
9899
strbuf_t *elf_strtab;
99100
strbuf_t *elf_section;
100101
int elf_header_len = 0x54; /* ELF fixed: 0x34 + 1 * 0x20 */
101102
int elf_code_start;
102103
int elf_data_start;
104+
int elf_rodata_start;
105+
int elf_bss_start;
106+
int elf_bss_size;
103107

104108
/* Create a new arena block with given capacity.
105109
* @capacity: The capacity of the arena block. Must be positive.
@@ -1227,10 +1231,12 @@ void global_init(void)
12271231

12281232
elf_code = strbuf_create(MAX_CODE);
12291233
elf_data = strbuf_create(MAX_DATA);
1234+
elf_rodata = strbuf_create(MAX_DATA);
12301235
elf_header = strbuf_create(MAX_HEADER);
12311236
elf_symtab = strbuf_create(MAX_SYMTAB);
12321237
elf_strtab = strbuf_create(MAX_STRTAB);
12331238
elf_section = strbuf_create(MAX_SECTION);
1239+
elf_bss_size = 0;
12341240
}
12351241

12361242
/* Forward declaration for lexer cleanup */
@@ -1350,6 +1356,7 @@ void global_release(void)
13501356
strbuf_free(SOURCE);
13511357
strbuf_free(elf_code);
13521358
strbuf_free(elf_data);
1359+
strbuf_free(elf_rodata);
13531360
strbuf_free(elf_header);
13541361
strbuf_free(elf_symtab);
13551362
strbuf_free(elf_strtab);
@@ -1452,6 +1459,11 @@ void dump_bb_insn(func_t *func, basic_block_t *bb, bool *at_func_start)
14521459
/* offset from .data section */
14531460
printf("%%%s = .data (%d)", rd->var_name, rd->init_val);
14541461
break;
1462+
case OP_load_rodata_address:
1463+
print_indent(1);
1464+
/* offset from .rodata section */
1465+
printf("%%%s = .rodata (%d)", rd->var_name, rd->init_val);
1466+
break;
14551467
case OP_address_of:
14561468
print_indent(1);
14571469
printf("%%%s = &(%%%s)", rd->var_name, rs1->var_name);

src/parser.c

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,10 @@ void read_expr(block_t *parent, basic_block_t **bb);
132132

133133
int write_symbol(const char *data)
134134
{
135-
const int start_len = elf_data->size;
136-
elf_write_str(elf_data, data);
137-
elf_write_byte(elf_data, 0);
135+
/* Write string literals to .rodata section */
136+
const int start_len = elf_rodata->size;
137+
elf_write_str(elf_rodata, data);
138+
elf_write_byte(elf_rodata, 0);
138139
return start_len;
139140
}
140141

@@ -1333,7 +1334,7 @@ void read_parameter_list_decl(func_t *func, bool anon)
13331334
lex_accept(T_comma);
13341335
}
13351336

1336-
while (lex_peek(T_identifier, NULL) == 1 || lex_peek(T_const, NULL)) {
1337+
while (lex_peek(T_identifier, NULL) || lex_peek(T_const, NULL)) {
13371338
/* Check for const qualifier */
13381339
bool is_const = false;
13391340
if (lex_accept(T_const))
@@ -1381,7 +1382,8 @@ void read_literal_param(block_t *parent, basic_block_t *bb)
13811382
gen_name_to(vd->var_name);
13821383
vd->init_val = index;
13831384
opstack_push(vd);
1384-
add_insn(parent, bb, OP_load_data_address, vd, NULL, NULL, 0, NULL);
1385+
/* String literals are now in .rodata section */
1386+
add_insn(parent, bb, OP_load_rodata_address, vd, NULL, NULL, 0, NULL);
13851387
}
13861388

13871389
void read_numeric_param(block_t *parent, basic_block_t *bb, bool is_neg)
@@ -1747,9 +1749,8 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
17471749

17481750
if (lex_accept(T_minus)) {
17491751
is_neg = true;
1750-
if (lex_peek(T_numeric, NULL) == 0 &&
1751-
lex_peek(T_identifier, NULL) == 0 &&
1752-
lex_peek(T_open_bracket, NULL) == 0) {
1752+
if (!lex_peek(T_numeric, NULL) && !lex_peek(T_identifier, NULL) &&
1753+
!lex_peek(T_open_bracket, NULL)) {
17531754
error("Unexpected token after unary minus");
17541755
}
17551756
}
@@ -3655,9 +3656,9 @@ bool read_global_assignment(char *token)
36553656
if (var) {
36563657
if (lex_peek(T_string, NULL)) {
36573658
/* String literal global initialization:
3658-
* Current implementation stores strings inline rather than in
3659-
* '.rodata'. Pointer vs array semantics handled by assignment logic
3660-
* below. mutate the size of var here.
3659+
* String literals are now stored in .rodata section.
3660+
* TODO: Full support for global pointer initialization with
3661+
* rodata addresses requires compile-time address resolution.
36613662
*/
36623663
read_literal_param(parent, bb);
36633664
rs1 = opstack_pop();

0 commit comments

Comments
 (0)