@@ -431,6 +431,105 @@ bool fold_constant_branches(func_t *func)
431431 return changed ;
432432}
433433
434+ /* Load/store elimination for consecutive memory operations.
435+ * Removes redundant loads and dead stores that access the same memory location.
436+ * Conservative implementation to maintain bootstrap stability.
437+ */
438+ bool eliminate_load_store_pairs (ph2_ir_t * ph2_ir )
439+ {
440+ ph2_ir_t * next = ph2_ir -> next ;
441+ if (!next )
442+ return false;
443+
444+ /* Only handle local loads/stores for now (not globals) to be safe */
445+
446+ /* Pattern 1: Consecutive stores to same local location
447+ * {store [addr], val1; store [addr], val2} → {store [addr], val2}
448+ * First store is dead if immediately overwritten
449+ */
450+ if (ph2_ir -> op == OP_store && next -> op == OP_store ) {
451+ /* Check if storing to same memory location */
452+ if (ph2_ir -> src0 == next -> src0 && ph2_ir -> src1 == next -> src1 &&
453+ ph2_ir -> src0 >= 0 && ph2_ir -> src1 >= 0 ) {
454+ /* Remove first store - it's dead */
455+ ph2_ir -> dest = next -> dest ;
456+ ph2_ir -> next = next -> next ;
457+ return true;
458+ }
459+ }
460+
461+ /* Pattern 2: Redundant consecutive loads from same local location
462+ * {load rd1, [addr]; load rd2, [addr]} → {load rd1, [addr]; mov rd2, rd1}
463+ * Second load can reuse the first load's result
464+ * Only apply if addresses are simple (not complex expressions)
465+ */
466+ if (ph2_ir -> op == OP_load && next -> op == OP_load ) {
467+ /* Check if loading from same memory location */
468+ if (ph2_ir -> src0 == next -> src0 && ph2_ir -> src1 == next -> src1 &&
469+ ph2_ir -> src0 >= 0 && ph2_ir -> src1 >= 0 ) {
470+ /* Replace second load with move */
471+ next -> op = OP_assign ;
472+ next -> src0 = ph2_ir -> dest ; /* Result of first load */
473+ next -> src1 = 0 ;
474+ return true;
475+ }
476+ }
477+
478+ /* Pattern 3: Store followed by load from same location (store-to-load
479+ * forwarding) {store [addr], val; load rd, [addr]} → {store [addr], val;
480+ * mov rd, val} The load can use the stored value directly
481+ */
482+ if (ph2_ir -> op == OP_store && next -> op == OP_load ) {
483+ /* Check if accessing same memory location */
484+ if (ph2_ir -> src0 == next -> src0 && ph2_ir -> src1 == next -> src1 &&
485+ ph2_ir -> src0 >= 0 && ph2_ir -> dest >= 0 ) {
486+ /* Replace load with move of stored value */
487+ next -> op = OP_assign ;
488+ next -> src0 = ph2_ir -> dest ; /* Value that was stored */
489+ next -> src1 = 0 ;
490+ return true;
491+ }
492+ }
493+
494+ /* Pattern 4: Load followed by redundant store of same value
495+ * {load rd, [addr]; store [addr], rd} → {load rd, [addr]}
496+ * The store is redundant if storing back the just-loaded value
497+ */
498+ if (ph2_ir -> op == OP_load && next -> op == OP_store ) {
499+ /* Check if storing the value we just loaded from same location */
500+ if (ph2_ir -> dest == next -> dest && ph2_ir -> src0 == next -> src0 &&
501+ ph2_ir -> src1 == next -> src1 && ph2_ir -> src0 >= 0 ) {
502+ /* Remove redundant store */
503+ ph2_ir -> next = next -> next ;
504+ return true;
505+ }
506+ }
507+
508+ /* Pattern 5: Global store/load optimizations (carefully enabled) */
509+ if (ph2_ir -> op == OP_global_store && next -> op == OP_global_store ) {
510+ /* Consecutive global stores to same location */
511+ if (ph2_ir -> src0 == next -> src0 && ph2_ir -> src1 == next -> src1 ) {
512+ /* Remove first store - it's dead */
513+ ph2_ir -> dest = next -> dest ;
514+ ph2_ir -> next = next -> next ;
515+ return true;
516+ }
517+ }
518+
519+ if (ph2_ir -> op == OP_global_load && next -> op == OP_global_load ) {
520+ /* Consecutive global loads from same location */
521+ if (ph2_ir -> src0 == next -> src0 && ph2_ir -> src1 == next -> src1 ) {
522+ /* Replace second load with move */
523+ next -> op = OP_assign ;
524+ next -> src0 = ph2_ir -> dest ;
525+ next -> src1 = 0 ;
526+ return true;
527+ }
528+ }
529+
530+ return false;
531+ }
532+
434533/* Main peephole optimization driver.
435534 * It iterates through all functions, basic blocks, and IR instructions to apply
436535 * local optimizations on adjacent instruction pairs.
@@ -466,6 +565,10 @@ void peephole(void)
466565 /* Apply redundant move elimination */
467566 if (redundant_move_elim (ir ))
468567 continue ;
568+
569+ /* Apply load/store elimination */
570+ if (eliminate_load_store_pairs (ir ))
571+ continue ;
469572 }
470573 }
471574 }
0 commit comments