Skip to content

Commit 382591b

Browse files
author
icgmilk
committed
Reduced memory usage in live sets
Live sets in shecc are implemented using arrays of pointers, which preallocate hundreds of unused slots, leading to significant memory waste. This patch replaces arrays of pointers with arena-backed vectors, reducing memory usage substantially. For example, compiling "src/main.c" previously required ~1.2GB of memory. With this patch, memory usage is reduced to ~305mb, a 75.4% improvement. Changes include: - Replace live sets with "var_list_t" for dynamic resizing. - Added helped routine in "ssa.c" for managing "var_list_t" instances. - Updated related logic in "reg-alloc.c" and "ssa.c".
1 parent 1169c76 commit 382591b

File tree

3 files changed

+75
-59
lines changed

3 files changed

+75
-59
lines changed

src/defs.h

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -535,14 +535,10 @@ struct basic_block {
535535
struct basic_block *r_idom;
536536
struct basic_block *rpo_next;
537537
struct basic_block *rpo_r_next;
538-
var_t *live_gen[MAX_ANALYSIS_STACK_SIZE];
539-
int live_gen_idx;
540-
var_t *live_kill[MAX_ANALYSIS_STACK_SIZE];
541-
int live_kill_idx;
542-
var_t *live_in[MAX_ANALYSIS_STACK_SIZE];
543-
int live_in_idx;
544-
var_t *live_out[MAX_ANALYSIS_STACK_SIZE];
545-
int live_out_idx;
538+
var_list_t live_gen;
539+
var_list_t live_kill;
540+
var_list_t live_in;
541+
var_list_t live_out;
546542
int rpo;
547543
int rpo_r;
548544
struct basic_block *DF[64];

src/reg-alloc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ int align_size(int i)
4747

4848
bool check_live_out(basic_block_t *bb, var_t *var)
4949
{
50-
for (int i = 0; i < bb->live_out_idx; i++) {
51-
if (bb->live_out[i] == var)
50+
for (int i = 0; i < bb->live_out.size; i++) {
51+
if (bb->live_out.elements[i] == var)
5252
return true;
5353
}
5454
return false;

src/ssa.c

Lines changed: 69 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,45 @@
2121
/* Dead store elimination window size */
2222
#define OVERWRITE_WINDOW 3
2323

24+
void var_list_ensure_capacity(var_list_t *list, int min_capacity)
25+
{
26+
if (list->capacity >= min_capacity) {
27+
return;
28+
}
29+
30+
int new_capacity = list->capacity ? list->capacity : 8;
31+
32+
while (new_capacity < min_capacity) {
33+
new_capacity <<= 1;
34+
}
35+
36+
var_t **new_elements = arena_alloc(BB_ARENA, new_capacity * HOST_PTR_SIZE);
37+
38+
if (list->elements)
39+
memcpy(new_elements, list->elements, list->size * HOST_PTR_SIZE);
40+
41+
list->elements = new_elements;
42+
list->capacity = new_capacity;
43+
}
44+
45+
void var_list_add_var(var_list_t *list, var_t *var)
46+
{
47+
for (int i = 0; i < list->size; i++) {
48+
if (list->elements[i] == var)
49+
return;
50+
}
51+
52+
var_list_ensure_capacity(list, list->size + 1);
53+
list->elements[list->size++] = var;
54+
}
55+
56+
void var_list_assign_array(var_list_t *list, var_t **data, int count)
57+
{
58+
var_list_ensure_capacity(list, count);
59+
memcpy(list->elements, data, count * HOST_PTR_SIZE);
60+
list->size = count;
61+
}
62+
2463
/* cfront does not accept structure as an argument, pass pointer */
2564
void bb_forward_traversal(bb_traversal_args_t *args)
2665
{
@@ -484,26 +523,16 @@ void use_chain_build(void)
484523

485524
bool var_check_killed(var_t *var, basic_block_t *bb)
486525
{
487-
for (int i = 0; i < bb->live_kill_idx; i++) {
488-
if (bb->live_kill[i] == var)
526+
for (int i = 0; i < bb->live_kill.size; i++) {
527+
if (bb->live_kill.elements[i] == var)
489528
return true;
490529
}
491530
return false;
492531
}
493532

494533
void bb_add_killed_var(basic_block_t *bb, var_t *var)
495534
{
496-
bool found = false;
497-
for (int i = 0; i < bb->live_kill_idx; i++) {
498-
if (bb->live_kill[i] == var) {
499-
found = true;
500-
break;
501-
}
502-
}
503-
if (found)
504-
return;
505-
506-
bb->live_kill[bb->live_kill_idx++] = var;
535+
var_list_add_var(&bb->live_kill, var);
507536
}
508537

509538
void var_add_killed_bb(var_t *var, basic_block_t *bb)
@@ -2475,7 +2504,7 @@ void build_reversed_rpo(void)
24752504
void bb_reset_live_kill_idx(func_t *func, basic_block_t *bb)
24762505
{
24772506
UNUSED(func);
2478-
bb->live_kill_idx = 0;
2507+
bb->live_kill.size = 0;
24792508
}
24802509

24812510
void add_live_gen(basic_block_t *bb, var_t *var);
@@ -2486,8 +2515,8 @@ void bb_reset_and_solve_locals(func_t *func, basic_block_t *bb)
24862515
{
24872516
UNUSED(func);
24882517

2489-
/* Reset live_kill index */
2490-
bb->live_kill_idx = 0;
2518+
/* Reset live_kill list */
2519+
bb->live_kill.size = 0;
24912520

24922521
/* Solve locals */
24932522
int i = 0;
@@ -2514,11 +2543,7 @@ void add_live_gen(basic_block_t *bb, var_t *var)
25142543
if (var->is_global)
25152544
return;
25162545

2517-
for (int i = 0; i < bb->live_gen_idx; i++) {
2518-
if (bb->live_gen[i] == var)
2519-
return;
2520-
}
2521-
bb->live_gen[bb->live_gen_idx++] = var;
2546+
var_list_add_var(&bb->live_gen, var);
25222547
}
25232548

25242549
void update_consumed(insn_t *insn, var_t *var)
@@ -2553,51 +2578,49 @@ void bb_solve_locals(func_t *func, basic_block_t *bb)
25532578

25542579
void add_live_in(basic_block_t *bb, var_t *var)
25552580
{
2556-
for (int i = 0; i < bb->live_in_idx; i++) {
2557-
if (bb->live_in[i] == var)
2558-
return;
2559-
}
2560-
bb->live_in[bb->live_in_idx++] = var;
2581+
var_list_add_var(&bb->live_in, var);
25612582
}
25622583

25632584
void compute_live_in(basic_block_t *bb)
25642585
{
2565-
bb->live_in_idx = 0;
2586+
bb->live_in.size = 0;
25662587

2567-
for (int i = 0; i < bb->live_out_idx; i++) {
2568-
if (var_check_killed(bb->live_out[i], bb))
2588+
for (int i = 0; i < bb->live_out.size; i++) {
2589+
var_t *var = bb->live_out.elements[i];
2590+
if (var_check_killed(var, bb))
25692591
continue;
2570-
add_live_in(bb, bb->live_out[i]);
2592+
add_live_in(bb, var);
25712593
}
2572-
for (int i = 0; i < bb->live_gen_idx; i++)
2573-
add_live_in(bb, bb->live_gen[i]);
2594+
for (int i = 0; i < bb->live_gen.size; i++)
2595+
add_live_in(bb, bb->live_gen.elements[i]);
25742596
}
25752597

25762598
int merge_live_in(var_t *live_out[], int live_out_idx, basic_block_t *bb)
25772599
{
25782600
/* Early exit for empty live_in */
2579-
if (bb->live_in_idx == 0)
2601+
if (bb->live_in.size == 0)
25802602
return live_out_idx;
25812603

25822604
/* Optimize for common case of small sets */
25832605
if (live_out_idx < 16) {
25842606
/* For small sets, simple linear search is fast enough */
2585-
for (int i = 0; i < bb->live_in_idx; i++) {
2607+
for (int i = 0; i < bb->live_in.size; i++) {
25862608
bool found = false;
2609+
var_t *var = bb->live_in.elements[i];
25872610
for (int j = 0; j < live_out_idx; j++) {
2588-
if (live_out[j] == bb->live_in[i]) {
2611+
if (live_out[j] == var) {
25892612
found = true;
25902613
break;
25912614
}
25922615
}
25932616
if (!found && live_out_idx < MAX_ANALYSIS_STACK_SIZE)
2594-
live_out[live_out_idx++] = bb->live_in[i];
2617+
live_out[live_out_idx++] = var;
25952618
}
25962619
} else {
25972620
/* For larger sets, check bounds and use optimized loop */
2598-
for (int i = 0; i < bb->live_in_idx; i++) {
2621+
for (int i = 0; i < bb->live_in.size; i++) {
25992622
bool found = false;
2600-
var_t *var = bb->live_in[i];
2623+
var_t *var = bb->live_in.elements[i];
26012624
/* Unroll inner loop for better performance */
26022625
int j;
26032626
for (j = 0; j + 3 < live_out_idx; j += 4) {
@@ -2643,9 +2666,8 @@ bool recompute_live_out(basic_block_t *bb)
26432666
}
26442667

26452668
/* Quick check: if sizes differ, sets must be different */
2646-
if (bb->live_out_idx != live_out_idx) {
2647-
memcpy(bb->live_out, live_out, HOST_PTR_SIZE * live_out_idx);
2648-
bb->live_out_idx = live_out_idx;
2669+
if (bb->live_out.size != live_out_idx) {
2670+
var_list_assign_array(&bb->live_out, live_out, live_out_idx);
26492671
return true;
26502672
}
26512673

@@ -2654,31 +2676,29 @@ bool recompute_live_out(basic_block_t *bb)
26542676
if (live_out_idx > 0) {
26552677
/* Quick check first element */
26562678
bool first_found = false;
2657-
for (int j = 0; j < bb->live_out_idx; j++) {
2658-
if (live_out[0] == bb->live_out[j]) {
2679+
for (int j = 0; j < bb->live_out.size; j++) {
2680+
if (live_out[0] == bb->live_out.elements[j]) {
26592681
first_found = true;
26602682
break;
26612683
}
26622684
}
26632685
if (!first_found) {
2664-
memcpy(bb->live_out, live_out, HOST_PTR_SIZE * live_out_idx);
2665-
bb->live_out_idx = live_out_idx;
2686+
var_list_assign_array(&bb->live_out, live_out, live_out_idx);
26662687
return true;
26672688
}
26682689
}
26692690

26702691
/* Full comparison */
26712692
for (int i = 0; i < live_out_idx; i++) {
26722693
int same = 0;
2673-
for (int j = 0; j < bb->live_out_idx; j++) {
2674-
if (live_out[i] == bb->live_out[j]) {
2694+
for (int j = 0; j < bb->live_out.size; j++) {
2695+
if (live_out[i] == bb->live_out.elements[j]) {
26752696
same = 1;
26762697
break;
26772698
}
26782699
}
26792700
if (!same) {
2680-
memcpy(bb->live_out, live_out, HOST_PTR_SIZE * live_out_idx);
2681-
bb->live_out_idx = live_out_idx;
2701+
var_list_assign_array(&bb->live_out, live_out, live_out_idx);
26822702
return true;
26832703
}
26842704
}

0 commit comments

Comments
 (0)