@@ -2397,54 +2397,364 @@ void optimize(void)
23972397 }
23982398 }
23992399
2400+ /* Extended load-after-store forwarding with multi-instruction
2401+ * analysis */
2402+ /* Search backwards for any store to the same location */
2403+ if (insn -> opcode == OP_load && insn -> rs1 ) {
2404+ insn_t * search = insn -> prev ;
2405+ int search_limit = 10 ; /* Look back up to 10 instructions */
2406+
2407+ while (search && search_limit > 0 ) {
2408+ /* Found a store to the same location */
2409+ if ((search -> opcode == OP_store ||
2410+ search -> opcode == OP_global_store ) &&
2411+ search -> rd == insn -> rs1 && search -> rs1 ) {
2412+ /* Check if the stored location is not modified
2413+ * between store and load */
2414+ bool safe_to_forward = true;
2415+ insn_t * check = search -> next ;
2416+ while (check && check != insn ) {
2417+ /* Check for any instruction that might modify
2418+ * the location */
2419+ if (check -> rd == insn -> rs1 &&
2420+ (check -> opcode == OP_store ||
2421+ check -> opcode == OP_global_store ||
2422+ check -> opcode == OP_write )) {
2423+ safe_to_forward = false;
2424+ break ;
2425+ }
2426+ /* Check for function calls that might have side
2427+ * effects */
2428+ if (check -> opcode == OP_call ||
2429+ check -> opcode == OP_indirect ) {
2430+ safe_to_forward = false;
2431+ break ;
2432+ }
2433+ check = check -> next ;
2434+ }
2435+
2436+ if (safe_to_forward ) {
2437+ /* Forward the stored value */
2438+ insn -> opcode = OP_assign ;
2439+ insn -> rs1 = search -> rs1 ;
2440+ insn -> rs2 = NULL ;
2441+ break ;
2442+ }
2443+ }
2444+
2445+ /* Stop if we hit a call or branch */
2446+ if (search -> opcode == OP_call ||
2447+ search -> opcode == OP_branch ||
2448+ search -> opcode == OP_jump ||
2449+ search -> opcode == OP_indirect ) {
2450+ break ;
2451+ }
2452+
2453+ search = search -> prev ;
2454+ search_limit -- ;
2455+ }
2456+ }
2457+
2458+ /* Redundant load elimination */
2459+ /* If we load from the same location multiple times, reuse the
2460+ * first load
2461+ */
2462+ if (insn -> opcode == OP_load && insn -> rs1 && insn -> rd ) {
2463+ insn_t * search = bb -> insn_list .head ;
2464+
2465+ while (search && search != insn ) {
2466+ /* Found an earlier load from the same location */
2467+ if (search -> opcode == OP_load &&
2468+ search -> rs1 == insn -> rs1 && search -> rd ) {
2469+ /* Check if the location wasn't modified between
2470+ * loads */
2471+ bool safe_to_reuse = true;
2472+ insn_t * check = search -> next ;
2473+
2474+ while (check && check != insn ) {
2475+ if ((check -> opcode == OP_store ||
2476+ check -> opcode == OP_global_store ||
2477+ check -> opcode == OP_write ) &&
2478+ check -> rd == insn -> rs1 ) {
2479+ safe_to_reuse = false;
2480+ break ;
2481+ }
2482+ /* Function calls might modify memory */
2483+ if (check -> opcode == OP_call ||
2484+ check -> opcode == OP_indirect ) {
2485+ safe_to_reuse = false;
2486+ break ;
2487+ }
2488+ check = check -> next ;
2489+ }
2490+
2491+ if (safe_to_reuse ) {
2492+ /* Replace load with assignment from previous
2493+ * load */
2494+ insn -> opcode = OP_assign ;
2495+ insn -> rs1 = search -> rd ;
2496+ insn -> rs2 = NULL ;
2497+ break ;
2498+ }
2499+ }
2500+ search = search -> next ;
2501+ }
2502+ }
2503+
2504+ /* Self-operation optimizations at SSA level */
2505+ /* These patterns must be handled at SSA level for correct
2506+ * self-hosting despite some duplication with peephole optimizer
2507+ */
2508+ if (insn -> rs1 && insn -> rs2 && insn -> rs1 == insn -> rs2 ) {
2509+ /* x - x = 0 */
2510+ if (insn -> opcode == OP_sub && insn -> rd ) {
2511+ insn -> opcode = OP_load_constant ;
2512+ insn -> rd -> is_const = true;
2513+ insn -> rd -> init_val = 0 ;
2514+ insn -> rs1 = NULL ;
2515+ insn -> rs2 = NULL ;
2516+ }
2517+ /* x ^ x = 0 */
2518+ else if (insn -> opcode == OP_bit_xor && insn -> rd ) {
2519+ insn -> opcode = OP_load_constant ;
2520+ insn -> rd -> is_const = true;
2521+ insn -> rd -> init_val = 0 ;
2522+ insn -> rs1 = NULL ;
2523+ insn -> rs2 = NULL ;
2524+ }
2525+ /* x & x = x */
2526+ else if (insn -> opcode == OP_bit_and && insn -> rd ) {
2527+ insn -> opcode = OP_assign ;
2528+ insn -> rs2 = NULL ;
2529+ }
2530+ /* x | x = x */
2531+ else if (insn -> opcode == OP_bit_or && insn -> rd ) {
2532+ insn -> opcode = OP_assign ;
2533+ insn -> rs2 = NULL ;
2534+ }
2535+ /* x / x = 1 (if x != 0) */
2536+ else if (insn -> opcode == OP_div && insn -> rd ) {
2537+ insn -> opcode = OP_load_constant ;
2538+ insn -> rd -> is_const = true;
2539+ insn -> rd -> init_val = 1 ;
2540+ insn -> rs1 = NULL ;
2541+ insn -> rs2 = NULL ;
2542+ }
2543+ /* x % x = 0 */
2544+ else if (insn -> opcode == OP_mod && insn -> rd ) {
2545+ insn -> opcode = OP_load_constant ;
2546+ insn -> rd -> is_const = true;
2547+ insn -> rd -> init_val = 0 ;
2548+ insn -> rs1 = NULL ;
2549+ insn -> rs2 = NULL ;
2550+ }
2551+ /* x == x = 1 */
2552+ else if (insn -> opcode == OP_eq && insn -> rd ) {
2553+ insn -> opcode = OP_load_constant ;
2554+ insn -> rd -> is_const = true;
2555+ insn -> rd -> init_val = 1 ;
2556+ insn -> rs1 = NULL ;
2557+ insn -> rs2 = NULL ;
2558+ }
2559+ /* x != x = 0 */
2560+ else if (insn -> opcode == OP_neq && insn -> rd ) {
2561+ insn -> opcode = OP_load_constant ;
2562+ insn -> rd -> is_const = true;
2563+ insn -> rd -> init_val = 0 ;
2564+ insn -> rs1 = NULL ;
2565+ insn -> rs2 = NULL ;
2566+ }
2567+ /* x < x = 0, x > x = 0 */
2568+ else if ((insn -> opcode == OP_lt || insn -> opcode == OP_gt ) &&
2569+ insn -> rd ) {
2570+ insn -> opcode = OP_load_constant ;
2571+ insn -> rd -> is_const = true;
2572+ insn -> rd -> init_val = 0 ;
2573+ insn -> rs1 = NULL ;
2574+ insn -> rs2 = NULL ;
2575+ }
2576+ /* x <= x = 1, x >= x = 1 */
2577+ else if ((insn -> opcode == OP_leq ||
2578+ insn -> opcode == OP_geq ) &&
2579+ insn -> rd ) {
2580+ insn -> opcode = OP_load_constant ;
2581+ insn -> rd -> is_const = true;
2582+ insn -> rd -> init_val = 1 ;
2583+ insn -> rs1 = NULL ;
2584+ insn -> rs2 = NULL ;
2585+ }
2586+ }
2587+
2588+ /* Comprehensive algebraic simplifications with identity
2589+ * operations */
2590+ if (insn -> rs2 && insn -> rs2 -> is_const && insn -> rd ) {
2591+ int val = insn -> rs2 -> init_val ;
2592+
2593+ /* x + 0 = x, x - 0 = x, x | 0 = x, x ^ 0 = x */
2594+ if (val == 0 ) {
2595+ if (insn -> opcode == OP_add || insn -> opcode == OP_sub ||
2596+ insn -> opcode == OP_bit_or ||
2597+ insn -> opcode == OP_bit_xor ) {
2598+ insn -> opcode = OP_assign ;
2599+ insn -> rs2 = NULL ;
2600+ }
2601+ /* x * 0 = 0, x & 0 = 0 */
2602+ else if (insn -> opcode == OP_mul ||
2603+ insn -> opcode == OP_bit_and ) {
2604+ insn -> opcode = OP_load_constant ;
2605+ insn -> rd -> is_const = true;
2606+ insn -> rd -> init_val = 0 ;
2607+ insn -> rs1 = NULL ;
2608+ insn -> rs2 = NULL ;
2609+ }
2610+ /* x << 0 = x, x >> 0 = x */
2611+ else if (insn -> opcode == OP_lshift ||
2612+ insn -> opcode == OP_rshift ) {
2613+ insn -> opcode = OP_assign ;
2614+ insn -> rs2 = NULL ;
2615+ }
2616+ }
2617+ /* x * 1 = x, x / 1 = x */
2618+ else if (val == 1 ) {
2619+ if (insn -> opcode == OP_mul || insn -> opcode == OP_div ) {
2620+ insn -> opcode = OP_assign ;
2621+ insn -> rs2 = NULL ;
2622+ }
2623+ }
2624+ /* x & -1 = x (all bits set) */
2625+ else if (val == -1 ) {
2626+ if (insn -> opcode == OP_bit_and ) {
2627+ insn -> opcode = OP_assign ;
2628+ insn -> rs2 = NULL ;
2629+ }
2630+ /* x | -1 = -1 */
2631+ else if (insn -> opcode == OP_bit_or ) {
2632+ insn -> opcode = OP_load_constant ;
2633+ insn -> rd -> is_const = true;
2634+ insn -> rd -> init_val = -1 ;
2635+ insn -> rs1 = NULL ;
2636+ insn -> rs2 = NULL ;
2637+ }
2638+ }
2639+ /* x * -1 = -x */
2640+ else if (val == -1 && insn -> opcode == OP_mul ) {
2641+ insn -> opcode = OP_negate ;
2642+ insn -> rs2 = NULL ;
2643+ }
2644+ }
2645+
2646+ /* Simplifications with rs1 constant */
2647+ if (insn -> rs1 && insn -> rs1 -> is_const && insn -> rd ) {
2648+ int val = insn -> rs1 -> init_val ;
2649+
2650+ /* 0 + x = x, 0 | x = x, 0 ^ x = x */
2651+ if (val == 0 ) {
2652+ if (insn -> opcode == OP_add ||
2653+ insn -> opcode == OP_bit_or ||
2654+ insn -> opcode == OP_bit_xor ) {
2655+ insn -> opcode = OP_assign ;
2656+ insn -> rs1 = insn -> rs2 ;
2657+ insn -> rs2 = NULL ;
2658+ }
2659+ /* 0 * x = 0, 0 & x = 0, 0 / x = 0 */
2660+ else if (insn -> opcode == OP_mul ||
2661+ insn -> opcode == OP_bit_and ||
2662+ insn -> opcode == OP_div ) {
2663+ insn -> opcode = OP_load_constant ;
2664+ insn -> rd -> is_const = true;
2665+ insn -> rd -> init_val = 0 ;
2666+ insn -> rs1 = NULL ;
2667+ insn -> rs2 = NULL ;
2668+ }
2669+ /* 0 - x = -x */
2670+ else if (insn -> opcode == OP_sub ) {
2671+ insn -> opcode = OP_negate ;
2672+ insn -> rs1 = insn -> rs2 ;
2673+ insn -> rs2 = NULL ;
2674+ }
2675+ }
2676+ /* 1 * x = x */
2677+ else if (val == 1 && insn -> opcode == OP_mul ) {
2678+ insn -> opcode = OP_assign ;
2679+ insn -> rs1 = insn -> rs2 ;
2680+ insn -> rs2 = NULL ;
2681+ }
2682+ /* -1 & x = x */
2683+ else if (val == -1 && insn -> opcode == OP_bit_and ) {
2684+ insn -> opcode = OP_assign ;
2685+ insn -> rs1 = insn -> rs2 ;
2686+ insn -> rs2 = NULL ;
2687+ }
2688+ }
2689+
24002690 /* Phi node optimization - eliminate trivial phi nodes */
24012691 if (insn -> opcode == OP_phi && insn -> phi_ops ) {
2402- /* Count unique operands */
2692+ /* Count unique operands and check for constants */
24032693 var_t * first_var = insn -> phi_ops -> var ;
24042694 bool all_same = true;
2405-
2406- for (phi_operand_t * op = insn -> phi_ops -> next ; op ;
2407- op = op -> next ) {
2408- if (op -> var != first_var ) {
2695+ bool all_const = true;
2696+ int const_val = 0 ;
2697+ int num_ops = 0 ;
2698+
2699+ for (phi_operand_t * op = insn -> phi_ops ; op ; op = op -> next ) {
2700+ num_ops ++ ;
2701+ /* Check if all same variable */
2702+ if (op != insn -> phi_ops && op -> var != first_var )
24092703 all_same = false;
2410- break ;
2704+ /* Check if all same constant */
2705+ if (op -> var && op -> var -> is_const ) {
2706+ if (op == insn -> phi_ops ) {
2707+ const_val = op -> var -> init_val ;
2708+ } else if (op -> var -> init_val != const_val ) {
2709+ all_const = false;
2710+ }
2711+ } else {
2712+ all_const = false;
24112713 }
24122714 }
24132715
2414- /* Replace trivial phi with simple assignment */
2415- if (all_same && first_var ) {
2716+ /* Eliminate trivial phi - all operands are the same var */
2717+ if (all_same && first_var && num_ops > 0 ) {
24162718 insn -> opcode = OP_assign ;
24172719 insn -> rs1 = first_var ;
24182720 insn -> rs2 = NULL ;
2721+ insn -> phi_ops = NULL ;
2722+ }
2723+ /* Constant phi - all operands have the same constant value
2724+ */
2725+ else if (all_const && num_ops > 0 && insn -> rd ) {
2726+ insn -> opcode = OP_load_constant ;
2727+ insn -> rd -> is_const = true;
2728+ insn -> rd -> init_val = const_val ;
2729+ insn -> rs1 = NULL ;
2730+ insn -> rs2 = NULL ;
2731+ insn -> phi_ops = NULL ;
24192732 }
24202733 }
24212734
2422- /* Simple strength reduction for division by power of 2 */
2423- if (insn -> opcode == OP_div && insn -> rs2 &&
2424- insn -> rs2 -> is_const ) {
2425- int val = insn -> rs2 -> init_val ;
2426- /* Check if power of 2 */
2427- if (val > 0 && (val & (val - 1 )) == 0 ) {
2428- /* Convert to right shift */
2735+ /* Strength reduction for division and modulo by power of 2 */
2736+ if (insn -> rs2 && insn -> rs2 -> is_const ) {
2737+ int divisor = insn -> rs2 -> init_val ;
2738+
2739+ /* Check if divisor is positive power of 2 */
2740+ if (divisor > 0 && (divisor & (divisor - 1 )) == 0 ) {
2741+ /* Find shift amount */
24292742 int shift = 0 ;
2430- while (val > 1 ) {
2431- val >>= 1 ;
2743+ int tmp = divisor ;
2744+ while (tmp > 1 ) {
2745+ tmp >>= 1 ;
24322746 shift ++ ;
24332747 }
2434- insn -> opcode = OP_rshift ;
2435- insn -> rs2 -> init_val = shift ;
2436- }
2437- }
24382748
2439- /* Simple strength reduction for modulo by power of 2 */
2440- if ( insn -> opcode == OP_mod && insn -> rs2 &&
2441- insn -> rs2 -> is_const ) {
2442- int val = insn -> rs2 -> init_val ;
2443- /* Check if power of 2 */
2444- if ( val > 0 && ( val & ( val - 1 )) == 0 ) {
2445- /* Convert to bitwise AND */
2446- insn -> opcode = OP_bit_and ;
2447- insn -> rs2 -> init_val = val - 1 ;
2749+ if ( insn -> opcode == OP_div ) {
2750+ /* Convert division to right shift */
2751+ insn -> opcode = OP_rshift ;
2752+ insn -> rs2 -> init_val = shift ;
2753+ } else if ( insn -> opcode == OP_mod ) {
2754+ /* Convert modulo to bitwise AND */
2755+ insn -> opcode = OP_bit_and ;
2756+ insn -> rs2 -> init_val = divisor - 1 ;
2757+ }
24482758 }
24492759 }
24502760
0 commit comments