diff --git a/src/peephole.c b/src/peephole.c
index f3282b71..42d8099a 100644
--- a/src/peephole.c
+++ b/src/peephole.c
@@ -242,13 +242,643 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
     return false;
 }
 
+/* Redundant move elimination
+ * Eliminates unnecessary move operations that are overwritten or redundant
+ */
+bool redundant_move_elim(ph2_ir_t *ph2_ir)
+{
+    ph2_ir_t *next = ph2_ir->next;
+    if (!next)
+        return false;
+
+    /* Pattern 1: Consecutive assignments to same destination
+     * {mov rd, rs1; mov rd, rs2} → {mov rd, rs2}
+     * The first move is completely overwritten by the second
+     */
+    if (ph2_ir->op == OP_assign && next->op == OP_assign &&
+        ph2_ir->dest == next->dest) {
+        /* Replace first move with second, skip second */
+        ph2_ir->src0 = next->src0;
+        ph2_ir->next = next->next;
+        return true;
+    }
+
+    /* Pattern 2: Redundant load immediately overwritten
+     * {load rd, offset; mov rd, rs} → {mov rd, rs}
+     * Loading a value that's immediately replaced is wasteful
+     */
+    if ((ph2_ir->op == OP_load || ph2_ir->op == OP_global_load) &&
+        next->op == OP_assign && ph2_ir->dest == next->dest) {
+        /* Replace load with move */
+        ph2_ir->op = OP_assign;
+        ph2_ir->src0 = next->src0;
+        ph2_ir->src1 = 0; /* Clear unused field */
+        ph2_ir->next = next->next;
+        return true;
+    }
+
+    /* Pattern 3: Load constant immediately overwritten
+     * {li rd, imm; mov rd, rs} → {mov rd, rs}
+     * Loading a constant that's immediately replaced
+     */
+    if (ph2_ir->op == OP_load_constant && next->op == OP_assign &&
+        ph2_ir->dest == next->dest) {
+        /* Replace constant load with move */
+        ph2_ir->op = OP_assign;
+        ph2_ir->src0 = next->src0;
+        ph2_ir->next = next->next;
+        return true;
+    }
+
+    /* Pattern 4: Consecutive loads to same register
+     * {load rd, offset1; load rd, offset2} → {load rd, offset2}
+     * First load is pointless if immediately overwritten
+     */
+    if ((ph2_ir->op == OP_load || ph2_ir->op == OP_global_load) &&
+        (next->op == OP_load || next->op == OP_global_load) &&
+        ph2_ir->dest == next->dest) {
+        /* Keep only the second load */
+        ph2_ir->op = next->op;
+        ph2_ir->src0 = next->src0;
+        ph2_ir->src1 = next->src1;
+        ph2_ir->next = next->next;
+        return true;
+    }
+
+    /* Pattern 5: Consecutive constant loads (already handled in main loop
+     * but included here for completeness)
+     * {li rd, imm1; li rd, imm2} → {li rd, imm2}
+     */
+    if (ph2_ir->op == OP_load_constant && next->op == OP_load_constant &&
+        ph2_ir->dest == next->dest) {
+        /* Keep only the second constant */
+        ph2_ir->src0 = next->src0;
+        ph2_ir->next = next->next;
+        return true;
+    }
+
+    /* Pattern 6: Move followed by load
+     * {mov rd, rs; load rd, offset} → {load rd, offset}
+     * The move is pointless if immediately overwritten by load
+     */
+    if (ph2_ir->op == OP_assign &&
+        (next->op == OP_load || next->op == OP_global_load) &&
+        ph2_ir->dest == next->dest) {
+        /* Replace move+load with just the load */
+        ph2_ir->op = next->op;
+        ph2_ir->src0 = next->src0;
+        ph2_ir->src1 = next->src1;
+        ph2_ir->next = next->next;
+        return true;
+    }
+
+    /* Pattern 7: Move followed by constant load
+     * {mov rd, rs; li rd, imm} → {li rd, imm}
+     * The move is pointless if immediately overwritten by constant
+     */
+    if (ph2_ir->op == OP_assign && next->op == OP_load_constant &&
+        ph2_ir->dest == next->dest) {
+        /* Replace move+li with just the li */
+        ph2_ir->op = OP_load_constant;
+        ph2_ir->src0 = next->src0;
+        ph2_ir->src1 = 0; /* Clear unused field */
+        ph2_ir->next = next->next;
+        return true;
+    }
+
+    return false;
+}
+
+
+/* Load/store elimination for consecutive memory operations.
+ * Removes redundant loads and dead stores that access the same memory location.
+ * Conservative implementation to maintain bootstrap stability.
+ */
+bool eliminate_load_store_pairs(ph2_ir_t *ph2_ir)
+{
+    ph2_ir_t *next = ph2_ir->next;
+    if (!next)
+        return false;
+
+    /* Only handle local loads/stores for now (not globals) to be safe */
+
+    /* Pattern 1: Consecutive stores to same local location
+     * {store [addr], val1; store [addr], val2} → {store [addr], val2}
+     * First store is dead if immediately overwritten
+     */
+    if (ph2_ir->op == OP_store && next->op == OP_store) {
+        /* Check if storing to same memory location */
+        if (ph2_ir->src0 == next->src0 && ph2_ir->src1 == next->src1 &&
+            ph2_ir->src0 >= 0 && ph2_ir->src1 >= 0) {
+            /* Remove first store - it's dead */
+            ph2_ir->dest = next->dest;
+            ph2_ir->next = next->next;
+            return true;
+        }
+    }
+
+    /* Pattern 2: Redundant consecutive loads from same local location
+     * {load rd1, [addr]; load rd2, [addr]} → {load rd1, [addr]; mov rd2, rd1}
+     * Second load can reuse the first load's result
+     * Only apply if addresses are simple (not complex expressions)
+     */
+    if (ph2_ir->op == OP_load && next->op == OP_load) {
+        /* Check if loading from same memory location */
+        if (ph2_ir->src0 == next->src0 && ph2_ir->src1 == next->src1 &&
+            ph2_ir->src0 >= 0 && ph2_ir->src1 >= 0) {
+            /* Replace second load with move */
+            next->op = OP_assign;
+            next->src0 = ph2_ir->dest; /* Result of first load */
+            next->src1 = 0;
+            return true;
+        }
+    }
+
+    /* Pattern 3: Store followed by load from same location (store-to-load
+     * forwarding) {store [addr], val; load rd, [addr]} → {store [addr], val;
+     * mov rd, val} The load can use the stored value directly
+     */
+    if (ph2_ir->op == OP_store && next->op == OP_load) {
+        /* Check if accessing same memory location */
+        if (ph2_ir->src0 == next->src0 && ph2_ir->src1 == next->src1 &&
+            ph2_ir->src0 >= 0 && ph2_ir->dest >= 0) {
+            /* Replace load with move of stored value */
+            next->op = OP_assign;
+            next->src0 = ph2_ir->dest; /* Value that was stored */
+            next->src1 = 0;
+            return true;
+        }
+    }
+
+    /* Pattern 4: Load followed by redundant store of same value
+     * {load rd, [addr]; store [addr], rd} → {load rd, [addr]}
+     * The store is redundant if storing back the just-loaded value
+     */
+    if (ph2_ir->op == OP_load && next->op == OP_store) {
+        /* Check if storing the value we just loaded from same location */
+        if (ph2_ir->dest == next->dest && ph2_ir->src0 == next->src0 &&
+            ph2_ir->src1 == next->src1 && ph2_ir->src0 >= 0) {
+            /* Remove redundant store */
+            ph2_ir->next = next->next;
+            return true;
+        }
+    }
+
+    /* Pattern 5: Global store/load optimizations (carefully enabled) */
+    if (ph2_ir->op == OP_global_store && next->op == OP_global_store) {
+        /* Consecutive global stores to same location */
+        if (ph2_ir->src0 == next->src0 && ph2_ir->src1 == next->src1) {
+            /* Remove first store - it's dead */
+            ph2_ir->dest = next->dest;
+            ph2_ir->next = next->next;
+            return true;
+        }
+    }
+
+    if (ph2_ir->op == OP_global_load && next->op == OP_global_load) {
+        /* Consecutive global loads from same location */
+        if (ph2_ir->src0 == next->src0 && ph2_ir->src1 == next->src1) {
+            /* Replace second load with move */
+            next->op = OP_assign;
+            next->src0 = ph2_ir->dest;
+            next->src1 = 0;
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/* Algebraic simplification: Apply mathematical identities to simplify
+ * expressions
+ *
+ * This function handles patterns that SSA cannot see:
+ * - Self-operations on registers (x-x, x^x, x|x, x&x)
+ * - These patterns emerge after register allocation when different
+ *   variables are assigned to the same register
+ *
+ * SSA handles: Constant folding with known values (5+3 → 8)
+ * Peephole handles: Register-based patterns (r1-r1 → 0)
+ *
+ * Returns true if optimization was applied
+ */
+bool algebraic_simplification(ph2_ir_t *ph2_ir)
+{
+    if (!ph2_ir)
+        return false;
+
+    /* NOTE: SSA's const_folding handles constant operations with known values.
+     * We focus on register-based patterns that appear after register
+     * allocation.
+     */
+
+    /* Pattern 1: Self-subtraction → 0
+     * x - x = 0 (for register operands)
+     */
+    if (ph2_ir->op == OP_sub && ph2_ir->src0 == ph2_ir->src1) {
+        ph2_ir->op = OP_load_constant;
+        ph2_ir->src0 = 0; /* result is 0 */
+        ph2_ir->src1 = 0; /* clear unused field */
+        return true;
+    }
+
+    /* Pattern 2: Self-XOR → 0
+     * x ^ x = 0 (for register operands)
+     */
+    if (ph2_ir->op == OP_bit_xor && ph2_ir->src0 == ph2_ir->src1) {
+        ph2_ir->op = OP_load_constant;
+        ph2_ir->src0 = 0; /* result is 0 */
+        ph2_ir->src1 = 0; /* clear unused field */
+        return true;
+    }
+
+    /* Pattern 3: Self-OR → x
+     * x | x = x (identity operation for register operands)
+     */
+    if (ph2_ir->op == OP_bit_or && ph2_ir->src0 == ph2_ir->src1) {
+        ph2_ir->op = OP_assign;
+        /* src0 already contains x, just need to move it */
+        ph2_ir->src1 = 0; /* clear unused field */
+        return true;
+    }
+
+    /* Pattern 4: Self-AND → x
+     * x & x = x (identity operation for register operands)
+     */
+    if (ph2_ir->op == OP_bit_and && ph2_ir->src0 == ph2_ir->src1) {
+        ph2_ir->op = OP_assign;
+        /* src0 already contains x, just need to move it */
+        ph2_ir->src1 = 0; /* clear unused field */
+        return true;
+    }
+
+    /* NOTE: Arithmetic identity patterns (x+0, x*1, x*0, x-0) are already
+     * handled by SSA's const_folding() function and insn_fusion().
+     * We focus on register-level patterns that SSA cannot see.
+     */
+
+    return false;
+}
+
+/* Division/modulo strength reduction: Optimize division and modulo by
+ * power-of-2
+ *
+ * This pattern is unique to peephole optimizer.
+ * SSA cannot perform this optimization because it works on virtual registers
+ * before actual constant values are loaded.
+ *
+ * Returns true if optimization was applied
+ */
+bool strength_reduction(ph2_ir_t *ph2_ir)
+{
+    if (!ph2_ir || !ph2_ir->next)
+        return false;
+
+    ph2_ir_t *next = ph2_ir->next;
+
+    /* Check for constant load followed by division or modulo */
+    if (ph2_ir->op != OP_load_constant)
+        return false;
+
+    int value = ph2_ir->src0;
+
+    /* Check if value is a power of 2 */
+    if (value <= 0 || (value & (value - 1)) != 0)
+        return false;
+
+    /* Calculate shift amount for power of 2 */
+    int shift = 0;
+    int tmp = value;
+    while (tmp > 1) {
+        shift++;
+        tmp >>= 1;
+    }
+
+    /* Pattern 1: Division by power of 2 → right shift
+     * x / 2^n = x >> n (for unsigned)
+     */
+    if (next->op == OP_div && next->src1 == ph2_ir->dest) {
+        /* Convert division to right shift */
+        ph2_ir->src0 = shift; /* Load shift amount instead */
+        next->op = OP_rshift;
+        return true;
+    }
+
+    /* Pattern 2: Modulo by power of 2 → bitwise AND
+     * x % 2^n = x & (2^n - 1)
+     */
+    if (next->op == OP_mod && next->src1 == ph2_ir->dest) {
+        /* Convert modulo to bitwise AND */
+        ph2_ir->src0 = value - 1; /* Load mask (2^n - 1) */
+        next->op = OP_bit_and;
+        return true;
+    }
+
+    /* Pattern 3: Multiplication by power of 2 → left shift
+     * x * 2^n = x << n
+     */
+    if (next->op == OP_mul) {
+        if (next->src0 == ph2_ir->dest) {
+            /* 2^n * x = x << n */
+            ph2_ir->src0 = shift; /* Load shift amount */
+            next->op = OP_lshift;
+            next->src0 = next->src1;   /* Move x to src0 */
+            next->src1 = ph2_ir->dest; /* Shift amount in src1 */
+            return true;
+        } else if (next->src1 == ph2_ir->dest) {
+            /* x * 2^n = x << n */
+            ph2_ir->src0 = shift; /* Load shift amount */
+            next->op = OP_lshift;
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/* Comparison optimization: Simplify comparison patterns
+ * Focus on register-based patterns that SSA's SCCP misses
+ * Returns true if optimization was applied
+ */
+bool comparison_optimization(ph2_ir_t *ph2_ir)
+{
+    if (!ph2_ir)
+        return false;
+
+    /* NOTE: SSA's SCCP handles constant comparisons, so we focus on
+     * register-based self-comparisons after register allocation
+     */
+
+    /* Pattern 1: Self-comparison always false for !=
+     * x != x → 0 (for register operands)
+     */
+    if (ph2_ir->op == OP_neq && ph2_ir->src0 == ph2_ir->src1) {
+        ph2_ir->op = OP_load_constant;
+        ph2_ir->src0 = 0; /* always false */
+        ph2_ir->src1 = 0;
+        return true;
+    }
+
+    /* Pattern 2: Self-comparison always true for ==
+     * x == x → 1 (for register operands)
+     */
+    if (ph2_ir->op == OP_eq && ph2_ir->src0 == ph2_ir->src1) {
+        ph2_ir->op = OP_load_constant;
+        ph2_ir->src0 = 1; /* always true */
+        ph2_ir->src1 = 0;
+        return true;
+    }
+
+    /* Pattern 3: Self-comparison for less-than
+     * x < x → 0 (always false)
+     */
+    if (ph2_ir->op == OP_lt && ph2_ir->src0 == ph2_ir->src1) {
+        ph2_ir->op = OP_load_constant;
+        ph2_ir->src0 = 0; /* always false */
+        ph2_ir->src1 = 0;
+        return true;
+    }
+
+    /* Pattern 4: Self-comparison for greater-than
+     * x > x → 0 (always false)
+     */
+    if (ph2_ir->op == OP_gt && ph2_ir->src0 == ph2_ir->src1) {
+        ph2_ir->op = OP_load_constant;
+        ph2_ir->src0 = 0; /* always false */
+        ph2_ir->src1 = 0;
+        return true;
+    }
+
+    /* Pattern 5: Self-comparison for less-equal
+     * x <= x → 1 (always true)
+     */
+    if (ph2_ir->op == OP_leq && ph2_ir->src0 == ph2_ir->src1) {
+        ph2_ir->op = OP_load_constant;
+        ph2_ir->src0 = 1; /* always true */
+        ph2_ir->src1 = 0;
+        return true;
+    }
+
+    /* Pattern 6: Self-comparison for greater-equal
+     * x >= x → 1 (always true)
+     */
+    if (ph2_ir->op == OP_geq && ph2_ir->src0 == ph2_ir->src1) {
+        ph2_ir->op = OP_load_constant;
+        ph2_ir->src0 = 1; /* always true */
+        ph2_ir->src1 = 0;
+        return true;
+    }
+
+    return false;
+}
+
+/* Bitwise operation optimization: Simplify bitwise patterns
+ * Returns true if optimization was applied
+ */
+bool bitwise_optimization(ph2_ir_t *ph2_ir)
+{
+    if (!ph2_ir || !ph2_ir->next)
+        return false;
+
+    ph2_ir_t *next = ph2_ir->next;
+
+    /* Pattern 1: Double complement → identity
+     * ~(~x) = x
+     */
+    if (ph2_ir->op == OP_negate && next->op == OP_negate &&
+        next->src0 == ph2_ir->dest) {
+        /* Replace with simple assignment */
+        ph2_ir->op = OP_assign;
+        ph2_ir->dest = next->dest;
+        ph2_ir->next = next->next;
+        return true;
+    }
+
+    /* Pattern 2: AND with all-ones mask → identity
+     * x & 0xFFFFFFFF = x (for 32-bit)
+     */
+    if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == -1 &&
+        next->op == OP_bit_and && next->src1 == ph2_ir->dest) {
+        /* Replace AND with assignment */
+        next->op = OP_assign;
+        next->src1 = 0;
+        ph2_ir->next = next->next;
+        return true;
+    }
+
+    /* Pattern 3: OR with zero → identity
+     * x | 0 = x
+     */
+    if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 &&
+        next->op == OP_bit_or && next->src1 == ph2_ir->dest) {
+        /* Replace OR with assignment */
+        next->op = OP_assign;
+        next->src1 = 0;
+        ph2_ir->next = next->next;
+        return true;
+    }
+
+    /* Pattern 4: XOR with zero → identity
+     * x ^ 0 = x
+     */
+    if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 &&
+        next->op == OP_bit_xor && next->src1 == ph2_ir->dest) {
+        /* Replace XOR with assignment */
+        next->op = OP_assign;
+        next->src1 = 0;
+        ph2_ir->next = next->next;
+        return true;
+    }
+
+    /* Pattern 5: AND with zero → zero
+     * x & 0 = 0
+     */
+    if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 &&
+        next->op == OP_bit_and &&
+        (next->src0 == ph2_ir->dest || next->src1 == ph2_ir->dest)) {
+        /* Replace with constant load of 0 */
+        next->op = OP_load_constant;
+        next->src0 = 0;
+        next->src1 = 0;
+        ph2_ir->next = next->next;
+        return true;
+    }
+
+    /* Pattern 6: OR with all-ones → all-ones
+     * x | 0xFFFFFFFF = 0xFFFFFFFF
+     */
+    if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == -1 &&
+        next->op == OP_bit_or &&
+        (next->src0 == ph2_ir->dest || next->src1 == ph2_ir->dest)) {
+        /* Replace with constant load of -1 */
+        next->op = OP_load_constant;
+        next->src0 = -1;
+        next->src1 = 0;
+        ph2_ir->next = next->next;
+        return true;
+    }
+
+    /* Pattern 7: Shift by zero → identity
+     * x << 0 = x, x >> 0 = x
+     */
+    if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 &&
+        (next->op == OP_lshift || next->op == OP_rshift) &&
+        next->src1 == ph2_ir->dest) {
+        /* Replace shift with assignment */
+        next->op = OP_assign;
+        next->src1 = 0;
+        ph2_ir->next = next->next;
+        return true;
+    }
+
+    return false;
+}
+
+/* Triple pattern optimization: Handle 3-instruction sequences
+ * These patterns are more complex but offer significant optimization
+ * opportunities Returns true if optimization was applied
+ */
+bool triple_pattern_optimization(ph2_ir_t *ph2_ir)
+{
+    if (!ph2_ir || !ph2_ir->next || !ph2_ir->next->next)
+        return false;
+
+    ph2_ir_t *second = ph2_ir->next;
+    ph2_ir_t *third = second->next;
+
+    /* Pattern 1: Store-load-store elimination
+     * {store val1, addr; load r, addr; store val2, addr}
+     * The middle load is pointless if not used elsewhere
+     */
+    if (ph2_ir->op == OP_store && second->op == OP_load &&
+        third->op == OP_store &&
+        ph2_ir->src1 == second->src0 && /* same address */
+        ph2_ir->dest == second->src1 && /* same offset */
+        second->src0 == third->src1 &&  /* same address */
+        second->src1 == third->dest) {  /* same offset */
+        /* Check if the loaded value is used by the third store */
+        if (third->src0 != second->dest) {
+            /* The load result is not used, can eliminate it */
+            ph2_ir->next = third;
+            return true;
+        }
+    }
+
+    /* Pattern 2: Consecutive stores to same location
+     * {store v1, addr; store v2, addr; store v3, addr}
+     * Only the last store matters
+     */
+    if (ph2_ir->op == OP_store && second->op == OP_store &&
+        third->op == OP_store && ph2_ir->src1 == second->src1 &&
+        ph2_ir->dest == second->dest && second->src1 == third->src1 &&
+        second->dest == third->dest) {
+        /* All three stores go to the same location */
+        /* Only the last one matters, eliminate first two */
+        ph2_ir->src0 = third->src0; /* Use last value */
+        ph2_ir->next = third->next; /* Skip middle stores */
+        return true;
+    }
+
+    /* FIXME: Additional patterns for future implementation:
+     *
+     * Pattern 3: Load-op-store with same location
+     * {load r1, [addr]; op r2, r1, ...; store r2, [addr]}
+     * Can optimize to in-place operation if possible
+     * Requires architecture-specific support in codegen.
+     *
+     * Pattern 4: Redundant comparison after boolean operation
+     * {cmp a, b; load 1; load 0} → simplified when used in branch
+     * The comparison already produces 0 or 1, constants may be redundant
+     *
+     * Pattern 5: Consecutive loads that can be combined
+     * {load r1, [base+off1]; load r2, [base+off2]; op r3, r1, r2}
+     * Useful for struct member access patterns
+     * Needs alignment checking and architecture support.
+     *
+     * Pattern 6: Load-Load-Select pattern
+     * {load r1, c1; load r2, c2; select/cmov based on condition}
+     * Can optimize by loading only the needed value
+     * Requires control flow analysis.
+     *
+     * Pattern 7: Add-Add-Add chain simplification
+     * {add r1, r0, c1; add r2, r1, c2; add r3, r2, c3}
+     * Can be simplified if all are constants
+     * Requires tracking constant values through the chain.
+     *
+     * Pattern 8: Global load followed by immediate use
+     * {global_load r1; op r2, r1, ...; store r2}
+     * Track global access patterns
+     * Could optimize to atomic operations or direct memory ops.
+     * Needs careful synchronization analysis.
+     */
+
+    return false;
+}
+
 /* Main peephole optimization driver.
- * It iterates through all functions, basic blocks, and IR instructions to apply
- * local optimizations on adjacent instruction pairs.
+ *
+ * SSA Optimizer (insn_t, before register allocation):
+ * - Constant folding with known values (5+3 → 8, x+0 → x)
+ * - Common subexpression elimination
+ * - Self-assignment elimination (x = x)
+ * - Dead code elimination
+ * - Constant comparison folding (5 < 3 → 0)
+ *
+ * Peephole Optimizer (ph2_ir_t, after register allocation):
+ * - Register-based self-operations (r1-r1 → 0, r1^r1 → 0)
+ * - Bitwise operation optimization (SSA doesn't handle these)
+ * - Strength reduction for power-of-2 (needs actual constants loaded)
+ * - Load/store pattern elimination
+ * - Triple instruction sequence optimization
+ * - Architecture-specific instruction fusion
+ *
+ * This refined separation eliminates redundant optimizations while
+ * maintaining comprehensive coverage of optimization opportunities.
  */
 void peephole(void)
 {
     for (func_t *func = FUNC_LIST.head; func; func = func->next) {
+        /* Local peephole optimizations on post-register-allocation IR */
         for (basic_block_t *bb = func->bbs; bb; bb = bb->rpo_next) {
             for (ph2_ir_t *ir = bb->ph2_ir_list.head; ir; ir = ir->next) {
                 ph2_ir_t *next = ir->next;
@@ -256,16 +886,47 @@ void peephole(void)
                     continue;
 
                 /* Self-assignment elimination
-                 * Removes trivial assignments where destination equals source
-                 * Pattern: {mov x, x} → eliminated
-                 * Common in compiler-generated intermediate code
+                 * Keep this as a safety net: SSA handles most cases, but
+                 * register allocation might create new self-assignments
                  */
                 if (next->op == OP_assign && next->dest == next->src0) {
                     ir->next = next->next;
                     continue;
                 }
 
-                insn_fusion(ir);
+                /* Try triple pattern optimization first (3-instruction
+                 * sequences)
+                 */
+                if (triple_pattern_optimization(ir))
+                    continue;
+
+                /* Try instruction fusion (2-instruction sequences) */
+                if (insn_fusion(ir))
+                    continue;
+
+                /* Apply comparison optimization */
+                if (comparison_optimization(ir))
+                    continue;
+
+                /* Apply strength reduction for power-of-2 operations */
+                if (strength_reduction(ir))
+                    continue;
+
+                /* Apply algebraic simplification */
+                if (algebraic_simplification(ir))
+                    continue;
+
+                /* Apply bitwise operation optimizations */
+                if (bitwise_optimization(ir))
+                    continue;
+
+                /* Apply redundant move elimination */
+                if (redundant_move_elim(ir))
+                    continue;
+
+                /* Apply load/store elimination */
+                if (eliminate_load_store_pairs(ir))
+                    continue;
             }
         }
     }