Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions src/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -535,14 +535,10 @@ struct basic_block {
struct basic_block *r_idom;
struct basic_block *rpo_next;
struct basic_block *rpo_r_next;
var_t *live_gen[MAX_ANALYSIS_STACK_SIZE];
int live_gen_idx;
var_t *live_kill[MAX_ANALYSIS_STACK_SIZE];
int live_kill_idx;
var_t *live_in[MAX_ANALYSIS_STACK_SIZE];
int live_in_idx;
var_t *live_out[MAX_ANALYSIS_STACK_SIZE];
int live_out_idx;
var_list_t live_gen;
var_list_t live_kill;
var_list_t live_in;
var_list_t live_out;
int rpo;
int rpo_r;
struct basic_block *DF[64];
Expand Down
4 changes: 2 additions & 2 deletions src/reg-alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ int align_size(int i)

bool check_live_out(basic_block_t *bb, var_t *var)
{
for (int i = 0; i < bb->live_out_idx; i++) {
if (bb->live_out[i] == var)
for (int i = 0; i < bb->live_out.size; i++) {
if (bb->live_out.elements[i] == var)
return true;
}
return false;
Expand Down
116 changes: 67 additions & 49 deletions src/ssa.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,43 @@
/* Dead store elimination window size */
#define OVERWRITE_WINDOW 3

void var_list_ensure_capacity(var_list_t *list, int min_capacity)
{
if (list->capacity >= min_capacity)
return;

int new_capacity = list->capacity ? list->capacity : HOST_PTR_SIZE;

while (new_capacity < min_capacity)
new_capacity <<= 1;

var_t **new_elements = arena_alloc(BB_ARENA, new_capacity * HOST_PTR_SIZE);

if (list->elements)
memcpy(new_elements, list->elements, list->size * HOST_PTR_SIZE);
Copy link
Collaborator

@visitorckw visitorckw Oct 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the new code might introduce several additional memory copy operations, I imagine it will be slower. I'm not sure what the performance impact will be, but I guess the 75% memory saving is very likely worth that cost.

However, this is a trade-off. Reading the commit message alone gives the impression that this is a pure improvement without any drawbacks.

I'm wondering if this performance difference has been measured?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I measured the performance difference and added the results to the
commit message and PR description.


list->elements = new_elements;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not super familiar with the arena_*() API, but don't we need to call arena_free() on the old pointer?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

arena_free() is used to free the whole arena, there's currently no freeing function specifically for allocated objects other than the internal arena block and arena itself.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, thanks for the clarification.
So IIUC, this will actually cause a memory leak that the current API cannot resolve, right?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well not really, this is properly handled in global_release() already:

arena_free(BB_ARENA);

So no memory leak is happening here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, but global_release() should only be called when the entire compiler is about to exit?

Therefore, when we use arena_alloc() to grow the old list->elements, I thought this was still a leakage until the entire compiler finishes?

Even if we don't call global_release(), the operating system should reclaim all used memory when the executable terminates.

But perhaps this isn't actually a serious problem because even if we don't immediately reclaim this memory that's no longer in use, the memory usage is still much less than the original static array.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IOW, the arena API thinks this memory is occupied and won't reuse it, but in fact, no pointers are actually referencing it anymore?

list->capacity = new_capacity;
}

void var_list_add_var(var_list_t *list, var_t *var)
{
for (int i = 0; i < list->size; i++) {
if (list->elements[i] == var)
return;
}

var_list_ensure_capacity(list, list->size + 1);
list->elements[list->size++] = var;
}

void var_list_assign_array(var_list_t *list, var_t **data, int count)
{
var_list_ensure_capacity(list, count);
memcpy(list->elements, data, count * HOST_PTR_SIZE);
list->size = count;
}

/* cfront does not accept structure as an argument, pass pointer */
void bb_forward_traversal(bb_traversal_args_t *args)
{
Expand Down Expand Up @@ -484,26 +521,16 @@ void use_chain_build(void)

bool var_check_killed(var_t *var, basic_block_t *bb)
{
for (int i = 0; i < bb->live_kill_idx; i++) {
if (bb->live_kill[i] == var)
for (int i = 0; i < bb->live_kill.size; i++) {
if (bb->live_kill.elements[i] == var)
return true;
}
return false;
}

void bb_add_killed_var(basic_block_t *bb, var_t *var)
{
bool found = false;
for (int i = 0; i < bb->live_kill_idx; i++) {
if (bb->live_kill[i] == var) {
found = true;
break;
}
}
if (found)
return;

bb->live_kill[bb->live_kill_idx++] = var;
var_list_add_var(&bb->live_kill, var);
}

void var_add_killed_bb(var_t *var, basic_block_t *bb)
Expand Down Expand Up @@ -2475,7 +2502,7 @@ void build_reversed_rpo(void)
void bb_reset_live_kill_idx(func_t *func, basic_block_t *bb)
{
UNUSED(func);
bb->live_kill_idx = 0;
bb->live_kill.size = 0;
}

void add_live_gen(basic_block_t *bb, var_t *var);
Expand All @@ -2486,8 +2513,8 @@ void bb_reset_and_solve_locals(func_t *func, basic_block_t *bb)
{
UNUSED(func);

/* Reset live_kill index */
bb->live_kill_idx = 0;
/* Reset live_kill list */
bb->live_kill.size = 0;

/* Solve locals */
int i = 0;
Expand All @@ -2514,11 +2541,7 @@ void add_live_gen(basic_block_t *bb, var_t *var)
if (var->is_global)
return;

for (int i = 0; i < bb->live_gen_idx; i++) {
if (bb->live_gen[i] == var)
return;
}
bb->live_gen[bb->live_gen_idx++] = var;
var_list_add_var(&bb->live_gen, var);
}

void update_consumed(insn_t *insn, var_t *var)
Expand Down Expand Up @@ -2553,51 +2576,49 @@ void bb_solve_locals(func_t *func, basic_block_t *bb)

void add_live_in(basic_block_t *bb, var_t *var)
{
for (int i = 0; i < bb->live_in_idx; i++) {
if (bb->live_in[i] == var)
return;
}
bb->live_in[bb->live_in_idx++] = var;
var_list_add_var(&bb->live_in, var);
}

void compute_live_in(basic_block_t *bb)
{
bb->live_in_idx = 0;
bb->live_in.size = 0;

for (int i = 0; i < bb->live_out_idx; i++) {
if (var_check_killed(bb->live_out[i], bb))
for (int i = 0; i < bb->live_out.size; i++) {
var_t *var = bb->live_out.elements[i];
if (var_check_killed(var, bb))
continue;
add_live_in(bb, bb->live_out[i]);
add_live_in(bb, var);
}
for (int i = 0; i < bb->live_gen_idx; i++)
add_live_in(bb, bb->live_gen[i]);
for (int i = 0; i < bb->live_gen.size; i++)
add_live_in(bb, bb->live_gen.elements[i]);
}

int merge_live_in(var_t *live_out[], int live_out_idx, basic_block_t *bb)
{
/* Early exit for empty live_in */
if (bb->live_in_idx == 0)
if (bb->live_in.size == 0)
return live_out_idx;

/* Optimize for common case of small sets */
if (live_out_idx < 16) {
/* For small sets, simple linear search is fast enough */
for (int i = 0; i < bb->live_in_idx; i++) {
for (int i = 0; i < bb->live_in.size; i++) {
bool found = false;
var_t *var = bb->live_in.elements[i];
for (int j = 0; j < live_out_idx; j++) {
if (live_out[j] == bb->live_in[i]) {
if (live_out[j] == var) {
found = true;
break;
}
}
if (!found && live_out_idx < MAX_ANALYSIS_STACK_SIZE)
live_out[live_out_idx++] = bb->live_in[i];
live_out[live_out_idx++] = var;
}
} else {
/* For larger sets, check bounds and use optimized loop */
for (int i = 0; i < bb->live_in_idx; i++) {
for (int i = 0; i < bb->live_in.size; i++) {
bool found = false;
var_t *var = bb->live_in[i];
var_t *var = bb->live_in.elements[i];
/* Unroll inner loop for better performance */
int j;
for (j = 0; j + 3 < live_out_idx; j += 4) {
Expand Down Expand Up @@ -2643,9 +2664,8 @@ bool recompute_live_out(basic_block_t *bb)
}

/* Quick check: if sizes differ, sets must be different */
if (bb->live_out_idx != live_out_idx) {
memcpy(bb->live_out, live_out, HOST_PTR_SIZE * live_out_idx);
bb->live_out_idx = live_out_idx;
if (bb->live_out.size != live_out_idx) {
var_list_assign_array(&bb->live_out, live_out, live_out_idx);
return true;
}

Expand All @@ -2654,31 +2674,29 @@ bool recompute_live_out(basic_block_t *bb)
if (live_out_idx > 0) {
/* Quick check first element */
bool first_found = false;
for (int j = 0; j < bb->live_out_idx; j++) {
if (live_out[0] == bb->live_out[j]) {
for (int j = 0; j < bb->live_out.size; j++) {
if (live_out[0] == bb->live_out.elements[j]) {
first_found = true;
break;
}
}
if (!first_found) {
memcpy(bb->live_out, live_out, HOST_PTR_SIZE * live_out_idx);
bb->live_out_idx = live_out_idx;
var_list_assign_array(&bb->live_out, live_out, live_out_idx);
return true;
}
}

/* Full comparison */
for (int i = 0; i < live_out_idx; i++) {
int same = 0;
for (int j = 0; j < bb->live_out_idx; j++) {
if (live_out[i] == bb->live_out[j]) {
for (int j = 0; j < bb->live_out.size; j++) {
if (live_out[i] == bb->live_out.elements[j]) {
same = 1;
break;
}
}
if (!same) {
memcpy(bb->live_out, live_out, HOST_PTR_SIZE * live_out_idx);
bb->live_out_idx = live_out_idx;
var_list_assign_array(&bb->live_out, live_out, live_out_idx);
return true;
}
}
Expand Down