Skip to content

Commit cb5bd04

Browse files
author
icgmilk
committed
Reduced memory usage in live sets
Live sets in shecc are implemented using arrays of pointers, which preallocate hundreds of unused slots, leading to significant memory waste. This patch replaces arrays of pointers with arena-backed vectors, reducing memory usage substantially. Although this change introduces additional memcpy during dynamic resize in "var_list_ensure_capacity", which may add overhead. The working set is much smaller and the better cache locality with fewer minor page faults outweigh the amortized memcpy cost. Measured (compiling src/main.c): - RSS: ~1.24GB -> ~305MB (≈ 75.4% reduction) - Elapsed time: 0.50s -> 0.17s (≈ 66% faster) - Minor page faults: ~309k -> ~76k Changes include: - Replace live sets with "var_list_t" for dynamic resizing. - Added helper routine in "ssa.c" for managing "var_list_t" instances. - Updated related logic in "reg-alloc.c" and "ssa.c".
1 parent 76e51ef commit cb5bd04

File tree

3 files changed

+73
-59
lines changed

3 files changed

+73
-59
lines changed

src/defs.h

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -535,14 +535,10 @@ struct basic_block {
535535
struct basic_block *r_idom;
536536
struct basic_block *rpo_next;
537537
struct basic_block *rpo_r_next;
538-
var_t *live_gen[MAX_ANALYSIS_STACK_SIZE];
539-
int live_gen_idx;
540-
var_t *live_kill[MAX_ANALYSIS_STACK_SIZE];
541-
int live_kill_idx;
542-
var_t *live_in[MAX_ANALYSIS_STACK_SIZE];
543-
int live_in_idx;
544-
var_t *live_out[MAX_ANALYSIS_STACK_SIZE];
545-
int live_out_idx;
538+
var_list_t live_gen;
539+
var_list_t live_kill;
540+
var_list_t live_in;
541+
var_list_t live_out;
546542
int rpo;
547543
int rpo_r;
548544
struct basic_block *DF[64];

src/reg-alloc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ int align_size(int i)
4747

4848
bool check_live_out(basic_block_t *bb, var_t *var)
4949
{
50-
for (int i = 0; i < bb->live_out_idx; i++) {
51-
if (bb->live_out[i] == var)
50+
for (int i = 0; i < bb->live_out.size; i++) {
51+
if (bb->live_out.elements[i] == var)
5252
return true;
5353
}
5454
return false;

src/ssa.c

Lines changed: 67 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,43 @@
2121
/* Dead store elimination window size */
2222
#define OVERWRITE_WINDOW 3
2323

24+
void var_list_ensure_capacity(var_list_t *list, int min_capacity)
25+
{
26+
if (list->capacity >= min_capacity)
27+
return;
28+
29+
int new_capacity = list->capacity ? list->capacity : HOST_PTR_SIZE;
30+
31+
while (new_capacity < min_capacity)
32+
new_capacity <<= 1;
33+
34+
var_t **new_elements = arena_alloc(BB_ARENA, new_capacity * HOST_PTR_SIZE);
35+
36+
if (list->elements)
37+
memcpy(new_elements, list->elements, list->size * HOST_PTR_SIZE);
38+
39+
list->elements = new_elements;
40+
list->capacity = new_capacity;
41+
}
42+
43+
void var_list_add_var(var_list_t *list, var_t *var)
44+
{
45+
for (int i = 0; i < list->size; i++) {
46+
if (list->elements[i] == var)
47+
return;
48+
}
49+
50+
var_list_ensure_capacity(list, list->size + 1);
51+
list->elements[list->size++] = var;
52+
}
53+
54+
void var_list_assign_array(var_list_t *list, var_t **data, int count)
55+
{
56+
var_list_ensure_capacity(list, count);
57+
memcpy(list->elements, data, count * HOST_PTR_SIZE);
58+
list->size = count;
59+
}
60+
2461
/* cfront does not accept structure as an argument, pass pointer */
2562
void bb_forward_traversal(bb_traversal_args_t *args)
2663
{
@@ -484,26 +521,16 @@ void use_chain_build(void)
484521

485522
bool var_check_killed(var_t *var, basic_block_t *bb)
486523
{
487-
for (int i = 0; i < bb->live_kill_idx; i++) {
488-
if (bb->live_kill[i] == var)
524+
for (int i = 0; i < bb->live_kill.size; i++) {
525+
if (bb->live_kill.elements[i] == var)
489526
return true;
490527
}
491528
return false;
492529
}
493530

494531
void bb_add_killed_var(basic_block_t *bb, var_t *var)
495532
{
496-
bool found = false;
497-
for (int i = 0; i < bb->live_kill_idx; i++) {
498-
if (bb->live_kill[i] == var) {
499-
found = true;
500-
break;
501-
}
502-
}
503-
if (found)
504-
return;
505-
506-
bb->live_kill[bb->live_kill_idx++] = var;
533+
var_list_add_var(&bb->live_kill, var);
507534
}
508535

509536
void var_add_killed_bb(var_t *var, basic_block_t *bb)
@@ -2475,7 +2502,7 @@ void build_reversed_rpo(void)
24752502
void bb_reset_live_kill_idx(func_t *func, basic_block_t *bb)
24762503
{
24772504
UNUSED(func);
2478-
bb->live_kill_idx = 0;
2505+
bb->live_kill.size = 0;
24792506
}
24802507

24812508
void add_live_gen(basic_block_t *bb, var_t *var);
@@ -2486,8 +2513,8 @@ void bb_reset_and_solve_locals(func_t *func, basic_block_t *bb)
24862513
{
24872514
UNUSED(func);
24882515

2489-
/* Reset live_kill index */
2490-
bb->live_kill_idx = 0;
2516+
/* Reset live_kill list */
2517+
bb->live_kill.size = 0;
24912518

24922519
/* Solve locals */
24932520
int i = 0;
@@ -2514,11 +2541,7 @@ void add_live_gen(basic_block_t *bb, var_t *var)
25142541
if (var->is_global)
25152542
return;
25162543

2517-
for (int i = 0; i < bb->live_gen_idx; i++) {
2518-
if (bb->live_gen[i] == var)
2519-
return;
2520-
}
2521-
bb->live_gen[bb->live_gen_idx++] = var;
2544+
var_list_add_var(&bb->live_gen, var);
25222545
}
25232546

25242547
void update_consumed(insn_t *insn, var_t *var)
@@ -2553,51 +2576,49 @@ void bb_solve_locals(func_t *func, basic_block_t *bb)
25532576

25542577
void add_live_in(basic_block_t *bb, var_t *var)
25552578
{
2556-
for (int i = 0; i < bb->live_in_idx; i++) {
2557-
if (bb->live_in[i] == var)
2558-
return;
2559-
}
2560-
bb->live_in[bb->live_in_idx++] = var;
2579+
var_list_add_var(&bb->live_in, var);
25612580
}
25622581

25632582
void compute_live_in(basic_block_t *bb)
25642583
{
2565-
bb->live_in_idx = 0;
2584+
bb->live_in.size = 0;
25662585

2567-
for (int i = 0; i < bb->live_out_idx; i++) {
2568-
if (var_check_killed(bb->live_out[i], bb))
2586+
for (int i = 0; i < bb->live_out.size; i++) {
2587+
var_t *var = bb->live_out.elements[i];
2588+
if (var_check_killed(var, bb))
25692589
continue;
2570-
add_live_in(bb, bb->live_out[i]);
2590+
add_live_in(bb, var);
25712591
}
2572-
for (int i = 0; i < bb->live_gen_idx; i++)
2573-
add_live_in(bb, bb->live_gen[i]);
2592+
for (int i = 0; i < bb->live_gen.size; i++)
2593+
add_live_in(bb, bb->live_gen.elements[i]);
25742594
}
25752595

25762596
int merge_live_in(var_t *live_out[], int live_out_idx, basic_block_t *bb)
25772597
{
25782598
/* Early exit for empty live_in */
2579-
if (bb->live_in_idx == 0)
2599+
if (bb->live_in.size == 0)
25802600
return live_out_idx;
25812601

25822602
/* Optimize for common case of small sets */
25832603
if (live_out_idx < 16) {
25842604
/* For small sets, simple linear search is fast enough */
2585-
for (int i = 0; i < bb->live_in_idx; i++) {
2605+
for (int i = 0; i < bb->live_in.size; i++) {
25862606
bool found = false;
2607+
var_t *var = bb->live_in.elements[i];
25872608
for (int j = 0; j < live_out_idx; j++) {
2588-
if (live_out[j] == bb->live_in[i]) {
2609+
if (live_out[j] == var) {
25892610
found = true;
25902611
break;
25912612
}
25922613
}
25932614
if (!found && live_out_idx < MAX_ANALYSIS_STACK_SIZE)
2594-
live_out[live_out_idx++] = bb->live_in[i];
2615+
live_out[live_out_idx++] = var;
25952616
}
25962617
} else {
25972618
/* For larger sets, check bounds and use optimized loop */
2598-
for (int i = 0; i < bb->live_in_idx; i++) {
2619+
for (int i = 0; i < bb->live_in.size; i++) {
25992620
bool found = false;
2600-
var_t *var = bb->live_in[i];
2621+
var_t *var = bb->live_in.elements[i];
26012622
/* Unroll inner loop for better performance */
26022623
int j;
26032624
for (j = 0; j + 3 < live_out_idx; j += 4) {
@@ -2643,9 +2664,8 @@ bool recompute_live_out(basic_block_t *bb)
26432664
}
26442665

26452666
/* Quick check: if sizes differ, sets must be different */
2646-
if (bb->live_out_idx != live_out_idx) {
2647-
memcpy(bb->live_out, live_out, HOST_PTR_SIZE * live_out_idx);
2648-
bb->live_out_idx = live_out_idx;
2667+
if (bb->live_out.size != live_out_idx) {
2668+
var_list_assign_array(&bb->live_out, live_out, live_out_idx);
26492669
return true;
26502670
}
26512671

@@ -2654,31 +2674,29 @@ bool recompute_live_out(basic_block_t *bb)
26542674
if (live_out_idx > 0) {
26552675
/* Quick check first element */
26562676
bool first_found = false;
2657-
for (int j = 0; j < bb->live_out_idx; j++) {
2658-
if (live_out[0] == bb->live_out[j]) {
2677+
for (int j = 0; j < bb->live_out.size; j++) {
2678+
if (live_out[0] == bb->live_out.elements[j]) {
26592679
first_found = true;
26602680
break;
26612681
}
26622682
}
26632683
if (!first_found) {
2664-
memcpy(bb->live_out, live_out, HOST_PTR_SIZE * live_out_idx);
2665-
bb->live_out_idx = live_out_idx;
2684+
var_list_assign_array(&bb->live_out, live_out, live_out_idx);
26662685
return true;
26672686
}
26682687
}
26692688

26702689
/* Full comparison */
26712690
for (int i = 0; i < live_out_idx; i++) {
26722691
int same = 0;
2673-
for (int j = 0; j < bb->live_out_idx; j++) {
2674-
if (live_out[i] == bb->live_out[j]) {
2692+
for (int j = 0; j < bb->live_out.size; j++) {
2693+
if (live_out[i] == bb->live_out.elements[j]) {
26752694
same = 1;
26762695
break;
26772696
}
26782697
}
26792698
if (!same) {
2680-
memcpy(bb->live_out, live_out, HOST_PTR_SIZE * live_out_idx);
2681-
bb->live_out_idx = live_out_idx;
2699+
var_list_assign_array(&bb->live_out, live_out, live_out_idx);
26822700
return true;
26832701
}
26842702
}

0 commit comments

Comments
 (0)