Skip to content

Commit d5dac27

Browse files
committed
Extended SSA optimization with multi-insn analysis
This transforms SSA optimizations from single-instruction to multi- instruction analysis for improved code generation: Extended Load-After-Store Forwarding: - Analyze up to 10 instructions backwards for forwarding opportunities - Validate safety across intervening instructions and function calls - Eliminate redundant memory operations across instruction sequences Full Basic Block Load Elimination: - Scan entire basic blocks for duplicate loads - Reuse first load result when no intervening stores detected - Significantly reduce memory traffic Comprehensive Algebraic Simplifications: - Complete set of self-operations (div, mod, all comparisons) - Full identity operations (x+0, x*1, x&-1, etc.) - Handle constants in both operand positions - Transform operations to simpler forms (0-x → -x, x*-1 → -x)
1 parent b57f072 commit d5dac27

File tree

1 file changed

+340
-30
lines changed

1 file changed

+340
-30
lines changed

src/ssa.c

Lines changed: 340 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2397,54 +2397,364 @@ void optimize(void)
23972397
}
23982398
}
23992399

2400+
/* Extended load-after-store forwarding with multi-instruction
2401+
* analysis */
2402+
/* Search backwards for any store to the same location */
2403+
if (insn->opcode == OP_load && insn->rs1) {
2404+
insn_t *search = insn->prev;
2405+
int search_limit = 10; /* Look back up to 10 instructions */
2406+
2407+
while (search && search_limit > 0) {
2408+
/* Found a store to the same location */
2409+
if ((search->opcode == OP_store ||
2410+
search->opcode == OP_global_store) &&
2411+
search->rd == insn->rs1 && search->rs1) {
2412+
/* Check if the stored location is not modified
2413+
* between store and load */
2414+
bool safe_to_forward = true;
2415+
insn_t *check = search->next;
2416+
while (check && check != insn) {
2417+
/* Check for any instruction that might modify
2418+
* the location */
2419+
if (check->rd == insn->rs1 &&
2420+
(check->opcode == OP_store ||
2421+
check->opcode == OP_global_store ||
2422+
check->opcode == OP_write)) {
2423+
safe_to_forward = false;
2424+
break;
2425+
}
2426+
/* Check for function calls that might have side
2427+
* effects */
2428+
if (check->opcode == OP_call ||
2429+
check->opcode == OP_indirect) {
2430+
safe_to_forward = false;
2431+
break;
2432+
}
2433+
check = check->next;
2434+
}
2435+
2436+
if (safe_to_forward) {
2437+
/* Forward the stored value */
2438+
insn->opcode = OP_assign;
2439+
insn->rs1 = search->rs1;
2440+
insn->rs2 = NULL;
2441+
break;
2442+
}
2443+
}
2444+
2445+
/* Stop if we hit a call or branch */
2446+
if (search->opcode == OP_call ||
2447+
search->opcode == OP_branch ||
2448+
search->opcode == OP_jump ||
2449+
search->opcode == OP_indirect) {
2450+
break;
2451+
}
2452+
2453+
search = search->prev;
2454+
search_limit--;
2455+
}
2456+
}
2457+
2458+
/* Redundant load elimination */
2459+
/* If we load from the same location multiple times, reuse the
2460+
* first load
2461+
*/
2462+
if (insn->opcode == OP_load && insn->rs1 && insn->rd) {
2463+
insn_t *search = bb->insn_list.head;
2464+
2465+
while (search && search != insn) {
2466+
/* Found an earlier load from the same location */
2467+
if (search->opcode == OP_load &&
2468+
search->rs1 == insn->rs1 && search->rd) {
2469+
/* Check if the location wasn't modified between
2470+
* loads */
2471+
bool safe_to_reuse = true;
2472+
insn_t *check = search->next;
2473+
2474+
while (check && check != insn) {
2475+
if ((check->opcode == OP_store ||
2476+
check->opcode == OP_global_store ||
2477+
check->opcode == OP_write) &&
2478+
check->rd == insn->rs1) {
2479+
safe_to_reuse = false;
2480+
break;
2481+
}
2482+
/* Function calls might modify memory */
2483+
if (check->opcode == OP_call ||
2484+
check->opcode == OP_indirect) {
2485+
safe_to_reuse = false;
2486+
break;
2487+
}
2488+
check = check->next;
2489+
}
2490+
2491+
if (safe_to_reuse) {
2492+
/* Replace load with assignment from previous
2493+
* load */
2494+
insn->opcode = OP_assign;
2495+
insn->rs1 = search->rd;
2496+
insn->rs2 = NULL;
2497+
break;
2498+
}
2499+
}
2500+
search = search->next;
2501+
}
2502+
}
2503+
2504+
/* Self-operation optimizations at SSA level */
2505+
/* These patterns must be handled at SSA level for correct
2506+
* self-hosting despite some duplication with peephole optimizer
2507+
*/
2508+
if (insn->rs1 && insn->rs2 && insn->rs1 == insn->rs2) {
2509+
/* x - x = 0 */
2510+
if (insn->opcode == OP_sub && insn->rd) {
2511+
insn->opcode = OP_load_constant;
2512+
insn->rd->is_const = true;
2513+
insn->rd->init_val = 0;
2514+
insn->rs1 = NULL;
2515+
insn->rs2 = NULL;
2516+
}
2517+
/* x ^ x = 0 */
2518+
else if (insn->opcode == OP_bit_xor && insn->rd) {
2519+
insn->opcode = OP_load_constant;
2520+
insn->rd->is_const = true;
2521+
insn->rd->init_val = 0;
2522+
insn->rs1 = NULL;
2523+
insn->rs2 = NULL;
2524+
}
2525+
/* x & x = x */
2526+
else if (insn->opcode == OP_bit_and && insn->rd) {
2527+
insn->opcode = OP_assign;
2528+
insn->rs2 = NULL;
2529+
}
2530+
/* x | x = x */
2531+
else if (insn->opcode == OP_bit_or && insn->rd) {
2532+
insn->opcode = OP_assign;
2533+
insn->rs2 = NULL;
2534+
}
2535+
/* x / x = 1 (if x != 0) */
2536+
else if (insn->opcode == OP_div && insn->rd) {
2537+
insn->opcode = OP_load_constant;
2538+
insn->rd->is_const = true;
2539+
insn->rd->init_val = 1;
2540+
insn->rs1 = NULL;
2541+
insn->rs2 = NULL;
2542+
}
2543+
/* x % x = 0 */
2544+
else if (insn->opcode == OP_mod && insn->rd) {
2545+
insn->opcode = OP_load_constant;
2546+
insn->rd->is_const = true;
2547+
insn->rd->init_val = 0;
2548+
insn->rs1 = NULL;
2549+
insn->rs2 = NULL;
2550+
}
2551+
/* x == x = 1 */
2552+
else if (insn->opcode == OP_eq && insn->rd) {
2553+
insn->opcode = OP_load_constant;
2554+
insn->rd->is_const = true;
2555+
insn->rd->init_val = 1;
2556+
insn->rs1 = NULL;
2557+
insn->rs2 = NULL;
2558+
}
2559+
/* x != x = 0 */
2560+
else if (insn->opcode == OP_neq && insn->rd) {
2561+
insn->opcode = OP_load_constant;
2562+
insn->rd->is_const = true;
2563+
insn->rd->init_val = 0;
2564+
insn->rs1 = NULL;
2565+
insn->rs2 = NULL;
2566+
}
2567+
/* x < x = 0, x > x = 0 */
2568+
else if ((insn->opcode == OP_lt || insn->opcode == OP_gt) &&
2569+
insn->rd) {
2570+
insn->opcode = OP_load_constant;
2571+
insn->rd->is_const = true;
2572+
insn->rd->init_val = 0;
2573+
insn->rs1 = NULL;
2574+
insn->rs2 = NULL;
2575+
}
2576+
/* x <= x = 1, x >= x = 1 */
2577+
else if ((insn->opcode == OP_leq ||
2578+
insn->opcode == OP_geq) &&
2579+
insn->rd) {
2580+
insn->opcode = OP_load_constant;
2581+
insn->rd->is_const = true;
2582+
insn->rd->init_val = 1;
2583+
insn->rs1 = NULL;
2584+
insn->rs2 = NULL;
2585+
}
2586+
}
2587+
2588+
/* Comprehensive algebraic simplifications with identity
2589+
* operations */
2590+
if (insn->rs2 && insn->rs2->is_const && insn->rd) {
2591+
int val = insn->rs2->init_val;
2592+
2593+
/* x + 0 = x, x - 0 = x, x | 0 = x, x ^ 0 = x */
2594+
if (val == 0) {
2595+
if (insn->opcode == OP_add || insn->opcode == OP_sub ||
2596+
insn->opcode == OP_bit_or ||
2597+
insn->opcode == OP_bit_xor) {
2598+
insn->opcode = OP_assign;
2599+
insn->rs2 = NULL;
2600+
}
2601+
/* x * 0 = 0, x & 0 = 0 */
2602+
else if (insn->opcode == OP_mul ||
2603+
insn->opcode == OP_bit_and) {
2604+
insn->opcode = OP_load_constant;
2605+
insn->rd->is_const = true;
2606+
insn->rd->init_val = 0;
2607+
insn->rs1 = NULL;
2608+
insn->rs2 = NULL;
2609+
}
2610+
/* x << 0 = x, x >> 0 = x */
2611+
else if (insn->opcode == OP_lshift ||
2612+
insn->opcode == OP_rshift) {
2613+
insn->opcode = OP_assign;
2614+
insn->rs2 = NULL;
2615+
}
2616+
}
2617+
/* x * 1 = x, x / 1 = x */
2618+
else if (val == 1) {
2619+
if (insn->opcode == OP_mul || insn->opcode == OP_div) {
2620+
insn->opcode = OP_assign;
2621+
insn->rs2 = NULL;
2622+
}
2623+
}
2624+
/* x & -1 = x (all bits set) */
2625+
else if (val == -1) {
2626+
if (insn->opcode == OP_bit_and) {
2627+
insn->opcode = OP_assign;
2628+
insn->rs2 = NULL;
2629+
}
2630+
/* x | -1 = -1 */
2631+
else if (insn->opcode == OP_bit_or) {
2632+
insn->opcode = OP_load_constant;
2633+
insn->rd->is_const = true;
2634+
insn->rd->init_val = -1;
2635+
insn->rs1 = NULL;
2636+
insn->rs2 = NULL;
2637+
}
2638+
}
2639+
/* x * -1 = -x */
2640+
else if (val == -1 && insn->opcode == OP_mul) {
2641+
insn->opcode = OP_negate;
2642+
insn->rs2 = NULL;
2643+
}
2644+
}
2645+
2646+
/* Simplifications with rs1 constant */
2647+
if (insn->rs1 && insn->rs1->is_const && insn->rd) {
2648+
int val = insn->rs1->init_val;
2649+
2650+
/* 0 + x = x, 0 | x = x, 0 ^ x = x */
2651+
if (val == 0) {
2652+
if (insn->opcode == OP_add ||
2653+
insn->opcode == OP_bit_or ||
2654+
insn->opcode == OP_bit_xor) {
2655+
insn->opcode = OP_assign;
2656+
insn->rs1 = insn->rs2;
2657+
insn->rs2 = NULL;
2658+
}
2659+
/* 0 * x = 0, 0 & x = 0, 0 / x = 0 */
2660+
else if (insn->opcode == OP_mul ||
2661+
insn->opcode == OP_bit_and ||
2662+
insn->opcode == OP_div) {
2663+
insn->opcode = OP_load_constant;
2664+
insn->rd->is_const = true;
2665+
insn->rd->init_val = 0;
2666+
insn->rs1 = NULL;
2667+
insn->rs2 = NULL;
2668+
}
2669+
/* 0 - x = -x */
2670+
else if (insn->opcode == OP_sub) {
2671+
insn->opcode = OP_negate;
2672+
insn->rs1 = insn->rs2;
2673+
insn->rs2 = NULL;
2674+
}
2675+
}
2676+
/* 1 * x = x */
2677+
else if (val == 1 && insn->opcode == OP_mul) {
2678+
insn->opcode = OP_assign;
2679+
insn->rs1 = insn->rs2;
2680+
insn->rs2 = NULL;
2681+
}
2682+
/* -1 & x = x */
2683+
else if (val == -1 && insn->opcode == OP_bit_and) {
2684+
insn->opcode = OP_assign;
2685+
insn->rs1 = insn->rs2;
2686+
insn->rs2 = NULL;
2687+
}
2688+
}
2689+
24002690
/* Phi node optimization - eliminate trivial phi nodes */
24012691
if (insn->opcode == OP_phi && insn->phi_ops) {
2402-
/* Count unique operands */
2692+
/* Count unique operands and check for constants */
24032693
var_t *first_var = insn->phi_ops->var;
24042694
bool all_same = true;
2405-
2406-
for (phi_operand_t *op = insn->phi_ops->next; op;
2407-
op = op->next) {
2408-
if (op->var != first_var) {
2695+
bool all_const = true;
2696+
int const_val = 0;
2697+
int num_ops = 0;
2698+
2699+
for (phi_operand_t *op = insn->phi_ops; op; op = op->next) {
2700+
num_ops++;
2701+
/* Check if all same variable */
2702+
if (op != insn->phi_ops && op->var != first_var)
24092703
all_same = false;
2410-
break;
2704+
/* Check if all same constant */
2705+
if (op->var && op->var->is_const) {
2706+
if (op == insn->phi_ops) {
2707+
const_val = op->var->init_val;
2708+
} else if (op->var->init_val != const_val) {
2709+
all_const = false;
2710+
}
2711+
} else {
2712+
all_const = false;
24112713
}
24122714
}
24132715

2414-
/* Replace trivial phi with simple assignment */
2415-
if (all_same && first_var) {
2716+
/* Eliminate trivial phi - all operands are the same var */
2717+
if (all_same && first_var && num_ops > 0) {
24162718
insn->opcode = OP_assign;
24172719
insn->rs1 = first_var;
24182720
insn->rs2 = NULL;
2721+
insn->phi_ops = NULL;
2722+
}
2723+
/* Constant phi - all operands have the same constant value
2724+
*/
2725+
else if (all_const && num_ops > 0 && insn->rd) {
2726+
insn->opcode = OP_load_constant;
2727+
insn->rd->is_const = true;
2728+
insn->rd->init_val = const_val;
2729+
insn->rs1 = NULL;
2730+
insn->rs2 = NULL;
2731+
insn->phi_ops = NULL;
24192732
}
24202733
}
24212734

2422-
/* Simple strength reduction for division by power of 2 */
2423-
if (insn->opcode == OP_div && insn->rs2 &&
2424-
insn->rs2->is_const) {
2425-
int val = insn->rs2->init_val;
2426-
/* Check if power of 2 */
2427-
if (val > 0 && (val & (val - 1)) == 0) {
2428-
/* Convert to right shift */
2735+
/* Strength reduction for division and modulo by power of 2 */
2736+
if (insn->rs2 && insn->rs2->is_const) {
2737+
int divisor = insn->rs2->init_val;
2738+
2739+
/* Check if divisor is positive power of 2 */
2740+
if (divisor > 0 && (divisor & (divisor - 1)) == 0) {
2741+
/* Find shift amount */
24292742
int shift = 0;
2430-
while (val > 1) {
2431-
val >>= 1;
2743+
int tmp = divisor;
2744+
while (tmp > 1) {
2745+
tmp >>= 1;
24322746
shift++;
24332747
}
2434-
insn->opcode = OP_rshift;
2435-
insn->rs2->init_val = shift;
2436-
}
2437-
}
24382748

2439-
/* Simple strength reduction for modulo by power of 2 */
2440-
if (insn->opcode == OP_mod && insn->rs2 &&
2441-
insn->rs2->is_const) {
2442-
int val = insn->rs2->init_val;
2443-
/* Check if power of 2 */
2444-
if (val > 0 && (val & (val - 1)) == 0) {
2445-
/* Convert to bitwise AND */
2446-
insn->opcode = OP_bit_and;
2447-
insn->rs2->init_val = val - 1;
2749+
if (insn->opcode == OP_div) {
2750+
/* Convert division to right shift */
2751+
insn->opcode = OP_rshift;
2752+
insn->rs2->init_val = shift;
2753+
} else if (insn->opcode == OP_mod) {
2754+
/* Convert modulo to bitwise AND */
2755+
insn->opcode = OP_bit_and;
2756+
insn->rs2->init_val = divisor - 1;
2757+
}
24482758
}
24492759
}
24502760

0 commit comments

Comments
 (0)