Skip to content

Commit 90db48b

Browse files
committed
Support .rodata/.bss section for generation
This commit adds infrastructure for .rodata (read-only data) and .bss (uninitialized data) sections and implements OP_load_rodata_address for addressing constants in .rodata. Then, it moves string literals from .data to .rodata section for better memory protection. TODO: implement optimizations for global variable initialization with compile-time constants.
1 parent 3493fd1 commit 90db48b

File tree

9 files changed

+135
-37
lines changed

9 files changed

+135
-37
lines changed

src/arm-codegen.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ void update_elf_offset(ph2_ir_t *ph2_ir)
9898
elf_offset += 116;
9999
return;
100100
case OP_load_data_address:
101+
case OP_load_rodata_address:
101102
elf_offset += 8;
102103
return;
103104
case OP_address_of_func:
@@ -190,7 +191,7 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
190191
func_t *func;
191192
const int rd = ph2_ir->dest;
192193
const int rn = ph2_ir->src0;
193-
const int rm = ph2_ir->src1;
194+
int rm = ph2_ir->src1; /* Not const because OP_trunc modifies it */
194195
int ofs;
195196

196197
/* Prepare this variable to reuse code for:
@@ -288,6 +289,10 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
288289
emit(__movw(__AL, rd, ph2_ir->src0 + elf_data_start));
289290
emit(__movt(__AL, rd, ph2_ir->src0 + elf_data_start));
290291
return;
292+
case OP_load_rodata_address:
293+
emit(__movw(__AL, rd, ph2_ir->src0 + elf_rodata_start));
294+
emit(__movt(__AL, rd, ph2_ir->src0 + elf_rodata_start));
295+
return;
291296
case OP_address_of_func:
292297
func = find_func(ph2_ir->func_name);
293298
ofs = elf_code_start + func->bbs->elf_offset;
@@ -310,7 +315,7 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
310315
emit(__movt(__AL, __r8, ph2_ir->src1 + 4));
311316
emit(__add_r(__AL, __sp, __sp, __r8));
312317
emit(__lw(__AL, __lr, __sp, -4));
313-
emit(__blx(__AL, __lr));
318+
emit(__bx(__AL, __lr));
314319
return;
315320
case OP_add:
316321
emit(__add_r(__AL, rd, rn, rm));
@@ -450,6 +455,8 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
450455
void code_generate(void)
451456
{
452457
elf_data_start = elf_code_start + elf_offset;
458+
elf_rodata_start = elf_data_start + elf_data->size;
459+
elf_bss_start = elf_rodata_start + elf_rodata->size;
453460

454461
/* start */
455462
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
@@ -477,7 +484,7 @@ void code_generate(void)
477484
emit(__mov_r(__AL, __r4, __r5));
478485
emit(__mov_r(__AL, __r5, __r6));
479486
emit(__svc());
480-
emit(__mov_r(__AL, __pc, __lr));
487+
emit(__bx(__AL, __lr));
481488

482489
ph2_ir_t *ph2_ir;
483490
for (ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;

src/defs.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,9 @@ typedef enum {
261261

262262
OP_allocat, /* allocate space on stack */
263263
OP_assign,
264-
OP_load_constant, /* load constant */
265-
OP_load_data_address, /* lookup address of a constant in data section */
264+
OP_load_constant, /* load constant */
265+
OP_load_data_address, /* lookup address of a constant in data section */
266+
OP_load_rodata_address, /* lookup address of a constant in rodata section */
266267

267268
/* control flow */
268269
OP_branch, /* conditional jump */
@@ -360,6 +361,8 @@ struct var {
360361
int array_dim1, array_dim2; /* first/second dimension size for 2D arrays */
361362
int offset; /* offset from stack or frame, index 0 is reserved */
362363
int init_val; /* for global initialization */
364+
/* true if global pointer initialized with rodata address */
365+
bool is_global_rodata_ref;
363366
int liveness; /* live range */
364367
int in_loop;
365368
struct var *base;

src/elf.c

Lines changed: 73 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,7 @@ void elf_generate_header(void)
6464
}
6565

6666
elf32_hdr_t hdr;
67-
/*
68-
* The following table explains the meaning of each field in the
67+
/* The following table explains the meaning of each field in the
6968
* ELF32 file header.
7069
*
7170
* Notice that the following values are hexadecimal.
@@ -134,26 +133,30 @@ void elf_generate_header(void)
134133
hdr.e_version = 1; /* ELF version */
135134
hdr.e_entry = ELF_START + elf_header_len; /* entry point */
136135
hdr.e_phoff = 0x34; /* program header offset */
137-
hdr.e_shoff = elf_header_len + elf_code->size + elf_data->size + 39 +
138-
elf_symtab->size +
139-
elf_strtab->size; /* section header offset */
140-
hdr.e_flags = ELF_FLAGS; /* flags */
141-
hdr.e_ehsize[0] = (char) 0x34; /* header size */
136+
/* Section header offset: The section headers come after symtab, strtab, and
137+
* shstrtab which are all written as part of elf_section buffer
138+
*/
139+
hdr.e_shoff = elf_header_len + elf_code->size + elf_data->size +
140+
elf_rodata->size + elf_symtab->size + elf_strtab->size + 53;
141+
/* 53 is shstrtab size with new section names */
142+
hdr.e_flags = ELF_FLAGS; /* flags */
143+
hdr.e_ehsize[0] = (char) 0x34; /* header size */
142144
hdr.e_ehsize[1] = 0;
143145
hdr.e_phentsize[0] = (char) 0x20; /* program header size */
144146
hdr.e_phentsize[1] = 0;
145147
hdr.e_phnum[0] = 1; /* number of program headers */
146148
hdr.e_phnum[1] = 0;
147149
hdr.e_shentsize[0] = (char) 0x28; /* section header size */
148150
hdr.e_shentsize[1] = 0;
149-
hdr.e_shnum[0] = 6; /* number of section headers */
151+
/* number of section headers: .rodata and .bss included */
152+
hdr.e_shnum[0] = 8;
150153
hdr.e_shnum[1] = 0;
151-
hdr.e_shstrndx[0] = 5; /* section index with names */
154+
/* section index with names: updated for new sections */
155+
hdr.e_shstrndx[0] = 7;
152156
hdr.e_shstrndx[1] = 0;
153157
elf_write_blk(elf_header, &hdr, sizeof(elf32_hdr_t));
154158

155-
/*
156-
* Explain the meaning of each field in the ELF32 program header.
159+
/* Explain the meaning of each field in the ELF32 program header.
157160
*
158161
* | Program | |
159162
* & | Header bytes | Explanation |
@@ -176,14 +179,16 @@ void elf_generate_header(void)
176179
*/
177180
/* program header - code and data combined */
178181
elf32_phdr_t phdr;
179-
phdr.p_type = 1; /* PT_LOAD */
180-
phdr.p_offset = elf_header_len; /* offset of segment */
181-
phdr.p_vaddr = ELF_START + elf_header_len; /* virtual address */
182-
phdr.p_paddr = ELF_START + elf_header_len; /* physical address */
183-
phdr.p_filesz = elf_code->size + elf_data->size; /* size in file */
184-
phdr.p_memsz = elf_code->size + elf_data->size; /* size in memory */
185-
phdr.p_flags = 7; /* flags */
186-
phdr.p_align = 4; /* alignment */
182+
phdr.p_type = 1; /* PT_LOAD */
183+
phdr.p_offset = elf_header_len; /* offset of segment */
184+
phdr.p_vaddr = ELF_START + elf_header_len; /* virtual address */
185+
phdr.p_paddr = ELF_START + elf_header_len; /* physical address */
186+
phdr.p_filesz = elf_code->size + elf_data->size +
187+
elf_rodata->size; /* size in file - includes .rodata */
188+
phdr.p_memsz = elf_code->size + elf_data->size + elf_rodata->size +
189+
elf_bss_size; /* size in memory - includes .bss */
190+
phdr.p_flags = 7; /* flags */
191+
phdr.p_align = 4; /* alignment */
187192
elf_write_blk(elf_header, &phdr, sizeof(elf32_phdr_t));
188193
}
189194

@@ -195,22 +200,30 @@ void elf_generate_sections(void)
195200
return;
196201
}
197202

203+
int section_data_size = 0;
204+
198205
/* symtab section */
199206
for (int b = 0; b < elf_symtab->size; b++)
200207
elf_write_byte(elf_section, elf_symtab->elements[b]);
208+
section_data_size += elf_symtab->size;
201209

202210
/* strtab section */
203211
for (int b = 0; b < elf_strtab->size; b++)
204212
elf_write_byte(elf_section, elf_strtab->elements[b]);
213+
section_data_size += elf_strtab->size;
205214

206-
/* shstr section; len = 39 */
215+
/* shstr section; len = 53 (was 39) - includes .rodata and .bss */
207216
elf_write_byte(elf_section, 0);
208217
elf_write_str(elf_section, ".shstrtab");
209218
elf_write_byte(elf_section, 0);
210219
elf_write_str(elf_section, ".text");
211220
elf_write_byte(elf_section, 0);
212221
elf_write_str(elf_section, ".data");
213222
elf_write_byte(elf_section, 0);
223+
elf_write_str(elf_section, ".rodata");
224+
elf_write_byte(elf_section, 0);
225+
elf_write_str(elf_section, ".bss");
226+
elf_write_byte(elf_section, 0);
214227
elf_write_str(elf_section, ".symtab");
215228
elf_write_byte(elf_section, 0);
216229
elf_write_str(elf_section, ".strtab");
@@ -288,22 +301,51 @@ void elf_generate_sections(void)
288301
elf_write_blk(elf_section, &shdr, sizeof(elf32_shdr_t));
289302
ofs += elf_data->size;
290303

304+
/* .rodata */
305+
shdr.sh_name = 0x17; /* Offset in shstrtab for ".rodata" */
306+
shdr.sh_type = 1; /* SHT_PROGBITS */
307+
shdr.sh_flags = 2; /* SHF_ALLOC only (read-only) */
308+
shdr.sh_addr = elf_code_start + elf_code->size + elf_data->size;
309+
shdr.sh_offset = ofs;
310+
shdr.sh_size = elf_rodata->size;
311+
shdr.sh_link = 0;
312+
shdr.sh_info = 0;
313+
shdr.sh_addralign = 4;
314+
shdr.sh_entsize = 0;
315+
elf_write_blk(elf_section, &shdr, sizeof(elf32_shdr_t));
316+
ofs += elf_rodata->size;
317+
318+
/* .bss */
319+
shdr.sh_name = 0x1f; /* Offset in shstrtab for ".bss" */
320+
shdr.sh_type = 8; /* SHT_NOBITS */
321+
shdr.sh_flags = 3; /* SHF_ALLOC | SHF_WRITE */
322+
shdr.sh_addr =
323+
elf_code_start + elf_code->size + elf_data->size + elf_rodata->size;
324+
shdr.sh_offset = ofs; /* File offset (not actually used for NOBITS) */
325+
shdr.sh_size = elf_bss_size;
326+
shdr.sh_link = 0;
327+
shdr.sh_info = 0;
328+
shdr.sh_addralign = 4;
329+
shdr.sh_entsize = 0;
330+
elf_write_blk(elf_section, &shdr, sizeof(elf32_shdr_t));
331+
/* Note: .bss is not written to file (SHT_NOBITS) */
332+
291333
/* .symtab */
292-
shdr.sh_name = 0x17;
334+
shdr.sh_name = 0x24; /* Updated offset for ".symtab" */
293335
shdr.sh_type = 2;
294336
shdr.sh_flags = 0;
295337
shdr.sh_addr = 0;
296338
shdr.sh_offset = ofs;
297339
shdr.sh_size = elf_symtab->size;
298-
shdr.sh_link = 4;
340+
shdr.sh_link = 6; /* Link to .strtab (section 6) */
299341
shdr.sh_info = elf_symbol_index;
300342
shdr.sh_addralign = 4;
301343
shdr.sh_entsize = 16;
302344
elf_write_blk(elf_section, &shdr, sizeof(elf32_shdr_t));
303345
ofs += elf_symtab->size;
304346

305347
/* .strtab */
306-
shdr.sh_name = 0x1f;
348+
shdr.sh_name = 0x2c; /* Updated offset for ".strtab" */
307349
shdr.sh_type = 3;
308350
shdr.sh_flags = 0;
309351
shdr.sh_addr = 0;
@@ -322,7 +364,7 @@ void elf_generate_sections(void)
322364
shdr.sh_flags = 0;
323365
shdr.sh_addr = 0;
324366
shdr.sh_offset = ofs;
325-
shdr.sh_size = 39;
367+
shdr.sh_size = 53; /* Updated from 39 to include new section names */
326368
shdr.sh_link = 0;
327369
shdr.sh_info = 0;
328370
shdr.sh_addralign = 1;
@@ -333,14 +375,17 @@ void elf_generate_sections(void)
333375
void elf_align(void)
334376
{
335377
/* Check for null pointers to prevent crashes */
336-
if (!elf_data || !elf_symtab || !elf_strtab) {
378+
if (!elf_data || !elf_rodata || !elf_symtab || !elf_strtab) {
337379
error("ELF buffers not initialized for alignment");
338380
return;
339381
}
340382

341383
while (elf_data->size & 3)
342384
elf_write_byte(elf_data, 0);
343385

386+
while (elf_rodata->size & 3)
387+
elf_write_byte(elf_rodata, 0);
388+
344389
while (elf_symtab->size & 3)
345390
elf_write_byte(elf_symtab, 0);
346391

@@ -387,6 +432,9 @@ void elf_generate(const char *outfile)
387432
fputc(elf_code->elements[i], fp);
388433
for (int i = 0; i < elf_data->size; i++)
389434
fputc(elf_data->elements[i], fp);
435+
for (int i = 0; i < elf_rodata->size; i++)
436+
fputc(elf_rodata->elements[i], fp);
437+
/* Note: .bss is not written to file (SHT_NOBITS) */
390438
for (int i = 0; i < elf_section->size; i++)
391439
fputc(elf_section->elements[i], fp);
392440
fclose(fp);

src/globals.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,17 @@ hashmap_t *INCLUSION_MAP;
9393
/* ELF sections */
9494
strbuf_t *elf_code;
9595
strbuf_t *elf_data;
96+
strbuf_t *elf_rodata;
9697
strbuf_t *elf_header;
9798
strbuf_t *elf_symtab;
9899
strbuf_t *elf_strtab;
99100
strbuf_t *elf_section;
100101
int elf_header_len = 0x54; /* ELF fixed: 0x34 + 1 * 0x20 */
101102
int elf_code_start;
102103
int elf_data_start;
104+
int elf_rodata_start;
105+
int elf_bss_start;
106+
int elf_bss_size;
103107

104108
/* Create a new arena block with given capacity.
105109
* @capacity: The capacity of the arena block. Must be positive.
@@ -1227,10 +1231,12 @@ void global_init(void)
12271231

12281232
elf_code = strbuf_create(MAX_CODE);
12291233
elf_data = strbuf_create(MAX_DATA);
1234+
elf_rodata = strbuf_create(MAX_DATA);
12301235
elf_header = strbuf_create(MAX_HEADER);
12311236
elf_symtab = strbuf_create(MAX_SYMTAB);
12321237
elf_strtab = strbuf_create(MAX_STRTAB);
12331238
elf_section = strbuf_create(MAX_SECTION);
1239+
elf_bss_size = 0;
12341240
}
12351241

12361242
/* Forward declaration for lexer cleanup */
@@ -1452,6 +1458,11 @@ void dump_bb_insn(func_t *func, basic_block_t *bb, bool *at_func_start)
14521458
/* offset from .data section */
14531459
printf("%%%s = .data (%d)", rd->var_name, rd->init_val);
14541460
break;
1461+
case OP_load_rodata_address:
1462+
print_indent(1);
1463+
/* offset from .rodata section */
1464+
printf("%%%s = .rodata (%d)", rd->var_name, rd->init_val);
1465+
break;
14551466
case OP_address_of:
14561467
print_indent(1);
14571468
printf("%%%s = &(%%%s)", rd->var_name, rs1->var_name);

src/parser.c

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,10 @@ void read_expr(block_t *parent, basic_block_t **bb);
132132

133133
int write_symbol(const char *data)
134134
{
135-
const int start_len = elf_data->size;
136-
elf_write_str(elf_data, data);
137-
elf_write_byte(elf_data, 0);
135+
/* Write string literals to .rodata section instead of .data */
136+
const int start_len = elf_rodata->size;
137+
elf_write_str(elf_rodata, data);
138+
elf_write_byte(elf_rodata, 0);
138139
return start_len;
139140
}
140141

@@ -1373,7 +1374,8 @@ void read_literal_param(block_t *parent, basic_block_t *bb)
13731374
gen_name_to(vd->var_name);
13741375
vd->init_val = index;
13751376
opstack_push(vd);
1376-
add_insn(parent, bb, OP_load_data_address, vd, NULL, NULL, 0, NULL);
1377+
/* String literals are now in .rodata section */
1378+
add_insn(parent, bb, OP_load_rodata_address, vd, NULL, NULL, 0, NULL);
13771379
}
13781380

13791381
void read_numeric_param(block_t *parent, basic_block_t *bb, bool is_neg)
@@ -3647,13 +3649,22 @@ bool read_global_assignment(char *token)
36473649
if (var) {
36483650
if (lex_peek(T_string, NULL)) {
36493651
/* String literal global initialization:
3650-
* Current implementation stores strings inline rather than in
3651-
* '.rodata'. Pointer vs array semantics handled by assignment logic
3652-
* below. mutate the size of var here.
3652+
* String literals are now stored in .rodata section.
3653+
* For global pointers, we need to store the runtime address
3654+
* of the string in the .data section.
36533655
*/
36543656
read_literal_param(parent, bb);
36553657
rs1 = opstack_pop();
36563658
vd = var;
3659+
3660+
/* For global pointer initialization with string literal,
3661+
* we need special handling to write the rodata address to .data
3662+
*/
3663+
if (var->ptr_level > 0) {
3664+
/* This is a pointer - store the address in .data */
3665+
var->init_val = rs1->init_val; /* Store rodata offset */
3666+
var->is_global_rodata_ref = true; /* Mark as rodata reference */
3667+
}
36573668
add_insn(parent, bb, OP_assign, vd, rs1, NULL, 0, NULL);
36583669
return true;
36593670
}

src/peephole.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ bool is_fusible_insn(ph2_ir_t *ph2_ir)
3434
case OP_load: /* Memory operations */
3535
case OP_global_load:
3636
case OP_load_data_address:
37+
case OP_load_rodata_address:
3738
return true;
3839
default:
3940
return false;

0 commit comments

Comments
 (0)