-
Notifications
You must be signed in to change notification settings - Fork 147
Reduced memory usage in live sets #311
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -21,6 +21,43 @@ | |||
| /* Dead store elimination window size */ | ||||
| #define OVERWRITE_WINDOW 3 | ||||
|
|
||||
| void var_list_ensure_capacity(var_list_t *list, int min_capacity) | ||||
| { | ||||
| if (list->capacity >= min_capacity) | ||||
| return; | ||||
|
|
||||
| int new_capacity = list->capacity ? list->capacity : HOST_PTR_SIZE; | ||||
|
|
||||
| while (new_capacity < min_capacity) | ||||
| new_capacity <<= 1; | ||||
|
|
||||
| var_t **new_elements = arena_alloc(BB_ARENA, new_capacity * HOST_PTR_SIZE); | ||||
|
|
||||
| if (list->elements) | ||||
| memcpy(new_elements, list->elements, list->size * HOST_PTR_SIZE); | ||||
|
|
||||
| list->elements = new_elements; | ||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not super familiar with the arena_*() API, but don't we need to call arena_free() on the old pointer?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it, thanks for the clarification.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well not really, this is properly handled in Line 1344 in 76e51ef
So no memory leak is happening here.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, but global_release() should only be called when the entire compiler is about to exit? Therefore, when we use arena_alloc() to grow the old list->elements, I thought this was still a leakage until the entire compiler finishes? Even if we don't call global_release(), the operating system should reclaim all used memory when the executable terminates. But perhaps this isn't actually a serious problem because even if we don't immediately reclaim this memory that's no longer in use, the memory usage is still much less than the original static array.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IOW, the arena API thinks this memory is occupied and won't reuse it, but in fact, no pointers are actually referencing it anymore? |
||||
| list->capacity = new_capacity; | ||||
| } | ||||
|
|
||||
| void var_list_add_var(var_list_t *list, var_t *var) | ||||
| { | ||||
| for (int i = 0; i < list->size; i++) { | ||||
| if (list->elements[i] == var) | ||||
| return; | ||||
| } | ||||
|
|
||||
| var_list_ensure_capacity(list, list->size + 1); | ||||
| list->elements[list->size++] = var; | ||||
| } | ||||
|
|
||||
| void var_list_assign_array(var_list_t *list, var_t **data, int count) | ||||
| { | ||||
| var_list_ensure_capacity(list, count); | ||||
| memcpy(list->elements, data, count * HOST_PTR_SIZE); | ||||
| list->size = count; | ||||
| } | ||||
|
|
||||
| /* cfront does not accept structure as an argument, pass pointer */ | ||||
| void bb_forward_traversal(bb_traversal_args_t *args) | ||||
| { | ||||
|
|
@@ -484,26 +521,16 @@ void use_chain_build(void) | |||
|
|
||||
| bool var_check_killed(var_t *var, basic_block_t *bb) | ||||
| { | ||||
| for (int i = 0; i < bb->live_kill_idx; i++) { | ||||
| if (bb->live_kill[i] == var) | ||||
| for (int i = 0; i < bb->live_kill.size; i++) { | ||||
| if (bb->live_kill.elements[i] == var) | ||||
| return true; | ||||
| } | ||||
| return false; | ||||
| } | ||||
|
|
||||
| void bb_add_killed_var(basic_block_t *bb, var_t *var) | ||||
| { | ||||
| bool found = false; | ||||
| for (int i = 0; i < bb->live_kill_idx; i++) { | ||||
| if (bb->live_kill[i] == var) { | ||||
| found = true; | ||||
| break; | ||||
| } | ||||
| } | ||||
| if (found) | ||||
| return; | ||||
|
|
||||
| bb->live_kill[bb->live_kill_idx++] = var; | ||||
| var_list_add_var(&bb->live_kill, var); | ||||
| } | ||||
|
|
||||
| void var_add_killed_bb(var_t *var, basic_block_t *bb) | ||||
|
|
@@ -2475,7 +2502,7 @@ void build_reversed_rpo(void) | |||
| void bb_reset_live_kill_idx(func_t *func, basic_block_t *bb) | ||||
| { | ||||
| UNUSED(func); | ||||
| bb->live_kill_idx = 0; | ||||
| bb->live_kill.size = 0; | ||||
| } | ||||
|
|
||||
| void add_live_gen(basic_block_t *bb, var_t *var); | ||||
|
|
@@ -2486,8 +2513,8 @@ void bb_reset_and_solve_locals(func_t *func, basic_block_t *bb) | |||
| { | ||||
| UNUSED(func); | ||||
|
|
||||
| /* Reset live_kill index */ | ||||
| bb->live_kill_idx = 0; | ||||
| /* Reset live_kill list */ | ||||
| bb->live_kill.size = 0; | ||||
|
|
||||
| /* Solve locals */ | ||||
| int i = 0; | ||||
|
|
@@ -2514,11 +2541,7 @@ void add_live_gen(basic_block_t *bb, var_t *var) | |||
| if (var->is_global) | ||||
| return; | ||||
|
|
||||
| for (int i = 0; i < bb->live_gen_idx; i++) { | ||||
| if (bb->live_gen[i] == var) | ||||
| return; | ||||
| } | ||||
| bb->live_gen[bb->live_gen_idx++] = var; | ||||
| var_list_add_var(&bb->live_gen, var); | ||||
| } | ||||
|
|
||||
| void update_consumed(insn_t *insn, var_t *var) | ||||
|
|
@@ -2553,51 +2576,49 @@ void bb_solve_locals(func_t *func, basic_block_t *bb) | |||
|
|
||||
| void add_live_in(basic_block_t *bb, var_t *var) | ||||
| { | ||||
| for (int i = 0; i < bb->live_in_idx; i++) { | ||||
| if (bb->live_in[i] == var) | ||||
| return; | ||||
| } | ||||
| bb->live_in[bb->live_in_idx++] = var; | ||||
| var_list_add_var(&bb->live_in, var); | ||||
| } | ||||
|
|
||||
| void compute_live_in(basic_block_t *bb) | ||||
| { | ||||
| bb->live_in_idx = 0; | ||||
| bb->live_in.size = 0; | ||||
|
|
||||
| for (int i = 0; i < bb->live_out_idx; i++) { | ||||
| if (var_check_killed(bb->live_out[i], bb)) | ||||
| for (int i = 0; i < bb->live_out.size; i++) { | ||||
| var_t *var = bb->live_out.elements[i]; | ||||
| if (var_check_killed(var, bb)) | ||||
| continue; | ||||
| add_live_in(bb, bb->live_out[i]); | ||||
| add_live_in(bb, var); | ||||
| } | ||||
| for (int i = 0; i < bb->live_gen_idx; i++) | ||||
| add_live_in(bb, bb->live_gen[i]); | ||||
| for (int i = 0; i < bb->live_gen.size; i++) | ||||
| add_live_in(bb, bb->live_gen.elements[i]); | ||||
| } | ||||
|
|
||||
| int merge_live_in(var_t *live_out[], int live_out_idx, basic_block_t *bb) | ||||
| { | ||||
| /* Early exit for empty live_in */ | ||||
| if (bb->live_in_idx == 0) | ||||
| if (bb->live_in.size == 0) | ||||
| return live_out_idx; | ||||
|
|
||||
| /* Optimize for common case of small sets */ | ||||
| if (live_out_idx < 16) { | ||||
| /* For small sets, simple linear search is fast enough */ | ||||
| for (int i = 0; i < bb->live_in_idx; i++) { | ||||
| for (int i = 0; i < bb->live_in.size; i++) { | ||||
| bool found = false; | ||||
| var_t *var = bb->live_in.elements[i]; | ||||
| for (int j = 0; j < live_out_idx; j++) { | ||||
| if (live_out[j] == bb->live_in[i]) { | ||||
| if (live_out[j] == var) { | ||||
| found = true; | ||||
| break; | ||||
| } | ||||
| } | ||||
| if (!found && live_out_idx < MAX_ANALYSIS_STACK_SIZE) | ||||
| live_out[live_out_idx++] = bb->live_in[i]; | ||||
| live_out[live_out_idx++] = var; | ||||
| } | ||||
| } else { | ||||
| /* For larger sets, check bounds and use optimized loop */ | ||||
| for (int i = 0; i < bb->live_in_idx; i++) { | ||||
| for (int i = 0; i < bb->live_in.size; i++) { | ||||
| bool found = false; | ||||
| var_t *var = bb->live_in[i]; | ||||
| var_t *var = bb->live_in.elements[i]; | ||||
| /* Unroll inner loop for better performance */ | ||||
| int j; | ||||
| for (j = 0; j + 3 < live_out_idx; j += 4) { | ||||
|
|
@@ -2643,9 +2664,8 @@ bool recompute_live_out(basic_block_t *bb) | |||
| } | ||||
|
|
||||
| /* Quick check: if sizes differ, sets must be different */ | ||||
| if (bb->live_out_idx != live_out_idx) { | ||||
| memcpy(bb->live_out, live_out, HOST_PTR_SIZE * live_out_idx); | ||||
| bb->live_out_idx = live_out_idx; | ||||
| if (bb->live_out.size != live_out_idx) { | ||||
| var_list_assign_array(&bb->live_out, live_out, live_out_idx); | ||||
| return true; | ||||
| } | ||||
|
|
||||
|
|
@@ -2654,31 +2674,29 @@ bool recompute_live_out(basic_block_t *bb) | |||
| if (live_out_idx > 0) { | ||||
| /* Quick check first element */ | ||||
| bool first_found = false; | ||||
| for (int j = 0; j < bb->live_out_idx; j++) { | ||||
| if (live_out[0] == bb->live_out[j]) { | ||||
| for (int j = 0; j < bb->live_out.size; j++) { | ||||
| if (live_out[0] == bb->live_out.elements[j]) { | ||||
| first_found = true; | ||||
| break; | ||||
| } | ||||
| } | ||||
| if (!first_found) { | ||||
| memcpy(bb->live_out, live_out, HOST_PTR_SIZE * live_out_idx); | ||||
| bb->live_out_idx = live_out_idx; | ||||
| var_list_assign_array(&bb->live_out, live_out, live_out_idx); | ||||
| return true; | ||||
| } | ||||
| } | ||||
|
|
||||
| /* Full comparison */ | ||||
| for (int i = 0; i < live_out_idx; i++) { | ||||
| int same = 0; | ||||
| for (int j = 0; j < bb->live_out_idx; j++) { | ||||
| if (live_out[i] == bb->live_out[j]) { | ||||
| for (int j = 0; j < bb->live_out.size; j++) { | ||||
| if (live_out[i] == bb->live_out.elements[j]) { | ||||
| same = 1; | ||||
| break; | ||||
| } | ||||
| } | ||||
| if (!same) { | ||||
| memcpy(bb->live_out, live_out, HOST_PTR_SIZE * live_out_idx); | ||||
| bb->live_out_idx = live_out_idx; | ||||
| var_list_assign_array(&bb->live_out, live_out, live_out_idx); | ||||
| return true; | ||||
| } | ||||
| } | ||||
|
|
||||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since the new code might introduce several additional memory copy operations, I imagine it will be slower. I'm not sure what the performance impact will be, but I guess the 75% memory saving is very likely worth that cost.
However, this is a trade-off. Reading the commit message alone gives the impression that this is a pure improvement without any drawbacks.
I'm wondering if this performance difference has been measured?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I measured the performance difference and added the results to the
commit message and PR description.