Skip to content

Commit aed99a8

Browse files
committed
Add .rodata section support for string literals
This implements proper ELF .rodata (read-only data) section support, completing the ELF section structure alongside existing .bss and .data sections. - Update ELF generation to include .rodata section (8 sections total) - Add .rodata section header with read-only flags (SHF_ALLOC, no SHF_WRITE) - Include .rodata in program header file size calculations - Update section offsets and indices throughout ELF generation The implementation ensures string literals are now stored in read-only memory, improving memory protection and adhering to standard ELF conventions.
1 parent c38ee81 commit aed99a8

File tree

8 files changed

+140
-44
lines changed

8 files changed

+140
-44
lines changed

README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -237,11 +237,10 @@ int fib(int n) def int @fib(int %n)
237237
238238
## Known Issues
239239
240-
1. The generated ELF lacks .bss and .rodata sections
241-
2. Full `<stdarg.h>` support is not available. Variadic functions work via direct pointer arithmetic.
240+
1. Full `<stdarg.h>` support is not available. Variadic functions work via direct pointer arithmetic.
242241
See the `printf` implementation in `lib/c.c` for the supported approach.
243-
3. The C front-end operates directly on token streams without building a full AST.
244-
4. Complex pointer arithmetic expressions like `*(p + offset)` have limited support.
242+
2. The C front-end operates directly on token streams without building a full AST.
243+
3. Complex pointer arithmetic expressions like `*(p + offset)` have limited support.
245244
246245
## License
247246

src/arm-codegen.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,9 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
285285
emit(__bl(__AL, func->bbs->elf_offset - elf_code->size));
286286
return;
287287
case OP_load_data_address:
288-
emit(__movw(__AL, rd, ph2_ir->src0 + elf_data_start));
289-
emit(__movt(__AL, rd, ph2_ir->src0 + elf_data_start));
288+
/* String literals are now in .rodata section */
289+
emit(__movw(__AL, rd, ph2_ir->src0 + elf_rodata_start));
290+
emit(__movt(__AL, rd, ph2_ir->src0 + elf_rodata_start));
290291
return;
291292
case OP_address_of_func:
292293
func = find_func(ph2_ir->func_name);
@@ -450,8 +451,10 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
450451
void code_generate(void)
451452
{
452453
elf_data_start = elf_code_start + elf_offset;
454+
elf_rodata_start = elf_data_start + elf_data->size;
455+
elf_bss_start = elf_rodata_start + elf_rodata->size;
453456

454-
/* start */
457+
/* start - allocate space for global variables */
455458
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
456459
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
457460
emit(__sub_r(__AL, __sp, __sp, __r8));
@@ -486,6 +489,7 @@ void code_generate(void)
486489

487490
/* prepare 'argc' and 'argv', then proceed to 'main' function */
488491
if (MAIN_BB) {
492+
/* argc and argv are saved above globals */
489493
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
490494
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
491495
emit(__add_r(__AL, __r8, __r12, __r8));

src/defs.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,8 @@ struct var {
353353
int ptr_level;
354354
bool is_func;
355355
bool is_global;
356-
bool address_taken; /* true if variable address was taken (&var) */
356+
bool is_initialized; /* true if global variable has initialization */
357+
bool address_taken; /* true if variable address was taken (&var) */
357358
int array_size;
358359
int array_dim1, array_dim2; /* first/second dimension size for 2D arrays */
359360
int offset; /* offset from stack or frame, index 0 is reserved */

src/elf.c

Lines changed: 63 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,9 @@ void elf_generate_header(void)
134134
hdr.e_version = 1; /* ELF version */
135135
hdr.e_entry = ELF_START + elf_header_len; /* entry point */
136136
hdr.e_phoff = 0x34; /* program header offset */
137-
hdr.e_shoff = elf_header_len + elf_code->size + elf_data->size + 39 +
138-
elf_symtab->size +
137+
/* Compute section header offset dynamically */
138+
hdr.e_shoff = elf_header_len + elf_code->size + elf_data->size +
139+
elf_rodata->size + elf_symtab->size +
139140
elf_strtab->size; /* section header offset */
140141
hdr.e_flags = ELF_FLAGS; /* flags */
141142
hdr.e_ehsize[0] = (char) 0x34; /* header size */
@@ -146,9 +147,9 @@ void elf_generate_header(void)
146147
hdr.e_phnum[1] = 0;
147148
hdr.e_shentsize[0] = (char) 0x28; /* section header size */
148149
hdr.e_shentsize[1] = 0;
149-
hdr.e_shnum[0] = 6; /* number of section headers */
150+
hdr.e_shnum[0] = 8; /* number of section headers - added .bss and .rodata */
150151
hdr.e_shnum[1] = 0;
151-
hdr.e_shstrndx[0] = 5; /* section index with names */
152+
hdr.e_shstrndx[0] = 7; /* section index with names */
152153
hdr.e_shstrndx[1] = 0;
153154
elf_write_blk(elf_header, &hdr, sizeof(elf32_hdr_t));
154155

@@ -176,14 +177,18 @@ void elf_generate_header(void)
176177
*/
177178
/* program header - code and data combined */
178179
elf32_phdr_t phdr;
179-
phdr.p_type = 1; /* PT_LOAD */
180-
phdr.p_offset = elf_header_len; /* offset of segment */
181-
phdr.p_vaddr = ELF_START + elf_header_len; /* virtual address */
182-
phdr.p_paddr = ELF_START + elf_header_len; /* physical address */
183-
phdr.p_filesz = elf_code->size + elf_data->size; /* size in file */
184-
phdr.p_memsz = elf_code->size + elf_data->size; /* size in memory */
185-
phdr.p_flags = 7; /* flags */
186-
phdr.p_align = 4; /* alignment */
180+
phdr.p_type = 1; /* PT_LOAD */
181+
phdr.p_offset = elf_header_len; /* offset of segment */
182+
phdr.p_vaddr = ELF_START + elf_header_len; /* virtual address */
183+
phdr.p_paddr = ELF_START + elf_header_len; /* physical address */
184+
/* size in file */
185+
phdr.p_filesz = elf_code->size + elf_data->size + elf_rodata->size;
186+
/* Include .bss size in memory but not in file size */
187+
phdr.p_memsz =
188+
elf_code->size + elf_data->size + elf_rodata->size + elf_bss_size;
189+
/* PF_X | PF_W | PF_R (all permissions for compatibility) */
190+
phdr.p_flags = 7;
191+
phdr.p_align = 4; /* alignment */
187192
elf_write_blk(elf_header, &phdr, sizeof(elf32_phdr_t));
188193
}
189194

@@ -203,18 +208,24 @@ void elf_generate_sections(void)
203208
for (int b = 0; b < elf_strtab->size; b++)
204209
elf_write_byte(elf_section, elf_strtab->elements[b]);
205210

206-
/* shstr section; len = 39 */
211+
/* shstr section - compute size dynamically */
212+
int shstrtab_start = elf_section->size;
207213
elf_write_byte(elf_section, 0);
208214
elf_write_str(elf_section, ".shstrtab");
209215
elf_write_byte(elf_section, 0);
210216
elf_write_str(elf_section, ".text");
211217
elf_write_byte(elf_section, 0);
212218
elf_write_str(elf_section, ".data");
213219
elf_write_byte(elf_section, 0);
220+
elf_write_str(elf_section, ".rodata");
221+
elf_write_byte(elf_section, 0);
222+
elf_write_str(elf_section, ".bss");
223+
elf_write_byte(elf_section, 0);
214224
elf_write_str(elf_section, ".symtab");
215225
elf_write_byte(elf_section, 0);
216226
elf_write_str(elf_section, ".strtab");
217227
elf_write_byte(elf_section, 0);
228+
int shstrtab_size = elf_section->size - shstrtab_start;
218229

219230
/* section header table */
220231
elf32_shdr_t shdr;
@@ -288,22 +299,51 @@ void elf_generate_sections(void)
288299
elf_write_blk(elf_section, &shdr, sizeof(elf32_shdr_t));
289300
ofs += elf_data->size;
290301

291-
/* .symtab */
302+
/* .rodata */
292303
shdr.sh_name = 0x17;
304+
shdr.sh_type = 1;
305+
shdr.sh_flags = 2; /* SHF_ALLOC only, no SHF_WRITE */
306+
shdr.sh_addr = elf_code_start + elf_code->size + elf_data->size;
307+
shdr.sh_offset = ofs;
308+
shdr.sh_size = elf_rodata->size;
309+
shdr.sh_link = 0;
310+
shdr.sh_info = 0;
311+
shdr.sh_addralign = 4;
312+
shdr.sh_entsize = 0;
313+
elf_write_blk(elf_section, &shdr, sizeof(elf32_shdr_t));
314+
ofs += elf_rodata->size;
315+
316+
/* .bss */
317+
shdr.sh_name = 0x1f;
318+
shdr.sh_type = 8; /* SHT_NOBITS */
319+
shdr.sh_flags = 3; /* SHF_WRITE | SHF_ALLOC */
320+
shdr.sh_addr =
321+
elf_code_start + elf_code->size + elf_data->size + elf_rodata->size;
322+
shdr.sh_offset = ofs; /* No file space for .bss */
323+
shdr.sh_size = elf_bss_size;
324+
shdr.sh_link = 0;
325+
shdr.sh_info = 0;
326+
shdr.sh_addralign = 4;
327+
shdr.sh_entsize = 0;
328+
elf_write_blk(elf_section, &shdr, sizeof(elf32_shdr_t));
329+
/* Note: .bss doesn't increase file offset */
330+
331+
/* .symtab */
332+
shdr.sh_name = 0x24;
293333
shdr.sh_type = 2;
294334
shdr.sh_flags = 0;
295335
shdr.sh_addr = 0;
296336
shdr.sh_offset = ofs;
297337
shdr.sh_size = elf_symtab->size;
298-
shdr.sh_link = 4;
338+
shdr.sh_link = 6;
299339
shdr.sh_info = elf_symbol_index;
300340
shdr.sh_addralign = 4;
301341
shdr.sh_entsize = 16;
302342
elf_write_blk(elf_section, &shdr, sizeof(elf32_shdr_t));
303343
ofs += elf_symtab->size;
304344

305345
/* .strtab */
306-
shdr.sh_name = 0x1f;
346+
shdr.sh_name = 0x2c;
307347
shdr.sh_type = 3;
308348
shdr.sh_flags = 0;
309349
shdr.sh_addr = 0;
@@ -322,7 +362,7 @@ void elf_generate_sections(void)
322362
shdr.sh_flags = 0;
323363
shdr.sh_addr = 0;
324364
shdr.sh_offset = ofs;
325-
shdr.sh_size = 39;
365+
shdr.sh_size = shstrtab_size;
326366
shdr.sh_link = 0;
327367
shdr.sh_info = 0;
328368
shdr.sh_addralign = 1;
@@ -333,14 +373,17 @@ void elf_generate_sections(void)
333373
void elf_align(void)
334374
{
335375
/* Check for null pointers to prevent crashes */
336-
if (!elf_data || !elf_symtab || !elf_strtab) {
376+
if (!elf_data || !elf_rodata || !elf_symtab || !elf_strtab) {
337377
error("ELF buffers not initialized for alignment");
338378
return;
339379
}
340380

341381
while (elf_data->size & 3)
342382
elf_write_byte(elf_data, 0);
343383

384+
while (elf_rodata->size & 3)
385+
elf_write_byte(elf_rodata, 0);
386+
344387
while (elf_symtab->size & 3)
345388
elf_write_byte(elf_symtab, 0);
346389

@@ -387,6 +430,8 @@ void elf_generate(char *outfile)
387430
fputc(elf_code->elements[i], fp);
388431
for (int i = 0; i < elf_data->size; i++)
389432
fputc(elf_data->elements[i], fp);
433+
for (int i = 0; i < elf_rodata->size; i++)
434+
fputc(elf_rodata->elements[i], fp);
390435
for (int i = 0; i < elf_section->size; i++)
391436
fputc(elf_section->elements[i], fp);
392437
fclose(fp);

src/globals.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,18 @@ hashmap_t *INCLUSION_MAP;
9393
/* ELF sections */
9494
strbuf_t *elf_code;
9595
strbuf_t *elf_data;
96+
strbuf_t *elf_rodata;
97+
strbuf_t *elf_bss;
9698
strbuf_t *elf_header;
9799
strbuf_t *elf_symtab;
98100
strbuf_t *elf_strtab;
99101
strbuf_t *elf_section;
100102
int elf_header_len = 0x54; /* ELF fixed: 0x34 + 1 * 0x20 */
101103
int elf_code_start;
102104
int elf_data_start;
105+
int elf_rodata_start;
106+
int elf_bss_start;
107+
int elf_bss_size;
103108

104109
/* Create a new arena block with given capacity.
105110
* @capacity: The capacity of the arena block. Must be positive.
@@ -1187,6 +1192,9 @@ void strbuf_free(strbuf_t *src)
11871192
void global_init(void)
11881193
{
11891194
elf_code_start = ELF_START + elf_header_len;
1195+
elf_data_start = elf_code_start; /* Will be updated after code section */
1196+
elf_rodata_start = elf_code_start; /* Will be updated after data section */
1197+
elf_bss_start = elf_code_start; /* Will be updated after rodata section */
11901198

11911199
MACROS_MAP = hashmap_create(MAX_ALIASES);
11921200

@@ -1227,10 +1235,13 @@ void global_init(void)
12271235

12281236
elf_code = strbuf_create(MAX_CODE);
12291237
elf_data = strbuf_create(MAX_DATA);
1238+
elf_rodata = strbuf_create(MAX_DATA);
1239+
elf_bss = strbuf_create(MAX_DATA);
12301240
elf_header = strbuf_create(MAX_HEADER);
12311241
elf_symtab = strbuf_create(MAX_SYMTAB);
12321242
elf_strtab = strbuf_create(MAX_STRTAB);
12331243
elf_section = strbuf_create(MAX_SECTION);
1244+
elf_bss_size = 0;
12341245
}
12351246

12361247
/* Forward declaration for lexer cleanup */
@@ -1350,6 +1361,8 @@ void global_release(void)
13501361
strbuf_free(SOURCE);
13511362
strbuf_free(elf_code);
13521363
strbuf_free(elf_data);
1364+
strbuf_free(elf_rodata);
1365+
strbuf_free(elf_bss);
13531366
strbuf_free(elf_header);
13541367
strbuf_free(elf_symtab);
13551368
strbuf_free(elf_strtab);

src/parser.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,10 @@ void read_expr(block_t *parent, basic_block_t **bb);
132132

133133
int write_symbol(char *data)
134134
{
135-
int start_len = elf_data->size;
136-
elf_write_str(elf_data, data);
137-
elf_write_byte(elf_data, 0);
135+
/* String literals go to .rodata section */
136+
int start_len = elf_rodata->size;
137+
elf_write_str(elf_rodata, data);
138+
elf_write_byte(elf_rodata, 0);
138139
return start_len;
139140
}
140141

@@ -1125,8 +1126,7 @@ void parse_array_init(var_t *var,
11251126
}
11261127
} else {
11271128
if (parent == GLOBAL_BLOCK) {
1128-
consume_global_constant_syntax();
1129-
val = NULL;
1129+
val = parse_global_constant_value(parent, bb);
11301130
} else {
11311131
read_expr(parent, bb);
11321132
read_ternary_operation(parent, bb);
@@ -4757,6 +4757,9 @@ void read_global_decl(block_t *block)
47574757

47584758
/* is a variable */
47594759
if (lex_accept(T_assign)) {
4760+
/* Mark variable as initialized since it has an initializer */
4761+
var->is_initialized = true;
4762+
47604763
/* If '{' follows and this is an array (explicit or implicit-size via
47614764
* pointer syntax), reuse the array initializer to emit per-element
47624765
* stores for globals as well.
@@ -4818,6 +4821,7 @@ void initialize_struct_field(var_t *nv, var_t *v, int offset)
48184821
nv->ptr_level = 0;
48194822
nv->is_func = false;
48204823
nv->is_global = false;
4824+
nv->is_initialized = false;
48214825
nv->array_size = 0;
48224826
nv->offset = offset;
48234827
nv->init_val = 0;

0 commit comments

Comments
 (0)