@@ -431,6 +431,105 @@ bool fold_constant_branches(func_t *func)
431
431
return changed ;
432
432
}
433
433
434
+ /* Load/store elimination for consecutive memory operations.
435
+ * Removes redundant loads and dead stores that access the same memory location.
436
+ * Conservative implementation to maintain bootstrap stability.
437
+ */
438
+ bool eliminate_load_store_pairs (ph2_ir_t * ph2_ir )
439
+ {
440
+ ph2_ir_t * next = ph2_ir -> next ;
441
+ if (!next )
442
+ return false;
443
+
444
+ /* Only handle local loads/stores for now (not globals) to be safe */
445
+
446
+ /* Pattern 1: Consecutive stores to same local location
447
+ * {store [addr], val1; store [addr], val2} → {store [addr], val2}
448
+ * First store is dead if immediately overwritten
449
+ */
450
+ if (ph2_ir -> op == OP_store && next -> op == OP_store ) {
451
+ /* Check if storing to same memory location */
452
+ if (ph2_ir -> src0 == next -> src0 && ph2_ir -> src1 == next -> src1 &&
453
+ ph2_ir -> src0 >= 0 && ph2_ir -> src1 >= 0 ) {
454
+ /* Remove first store - it's dead */
455
+ ph2_ir -> dest = next -> dest ;
456
+ ph2_ir -> next = next -> next ;
457
+ return true;
458
+ }
459
+ }
460
+
461
+ /* Pattern 2: Redundant consecutive loads from same local location
462
+ * {load rd1, [addr]; load rd2, [addr]} → {load rd1, [addr]; mov rd2, rd1}
463
+ * Second load can reuse the first load's result
464
+ * Only apply if addresses are simple (not complex expressions)
465
+ */
466
+ if (ph2_ir -> op == OP_load && next -> op == OP_load ) {
467
+ /* Check if loading from same memory location */
468
+ if (ph2_ir -> src0 == next -> src0 && ph2_ir -> src1 == next -> src1 &&
469
+ ph2_ir -> src0 >= 0 && ph2_ir -> src1 >= 0 ) {
470
+ /* Replace second load with move */
471
+ next -> op = OP_assign ;
472
+ next -> src0 = ph2_ir -> dest ; /* Result of first load */
473
+ next -> src1 = 0 ;
474
+ return true;
475
+ }
476
+ }
477
+
478
+ /* Pattern 3: Store followed by load from same location (store-to-load
479
+ * forwarding) {store [addr], val; load rd, [addr]} → {store [addr], val;
480
+ * mov rd, val} The load can use the stored value directly
481
+ */
482
+ if (ph2_ir -> op == OP_store && next -> op == OP_load ) {
483
+ /* Check if accessing same memory location */
484
+ if (ph2_ir -> src0 == next -> src0 && ph2_ir -> src1 == next -> src1 &&
485
+ ph2_ir -> src0 >= 0 && ph2_ir -> dest >= 0 ) {
486
+ /* Replace load with move of stored value */
487
+ next -> op = OP_assign ;
488
+ next -> src0 = ph2_ir -> dest ; /* Value that was stored */
489
+ next -> src1 = 0 ;
490
+ return true;
491
+ }
492
+ }
493
+
494
+ /* Pattern 4: Load followed by redundant store of same value
495
+ * {load rd, [addr]; store [addr], rd} → {load rd, [addr]}
496
+ * The store is redundant if storing back the just-loaded value
497
+ */
498
+ if (ph2_ir -> op == OP_load && next -> op == OP_store ) {
499
+ /* Check if storing the value we just loaded from same location */
500
+ if (ph2_ir -> dest == next -> dest && ph2_ir -> src0 == next -> src0 &&
501
+ ph2_ir -> src1 == next -> src1 && ph2_ir -> src0 >= 0 ) {
502
+ /* Remove redundant store */
503
+ ph2_ir -> next = next -> next ;
504
+ return true;
505
+ }
506
+ }
507
+
508
+ /* Pattern 5: Global store/load optimizations (carefully enabled) */
509
+ if (ph2_ir -> op == OP_global_store && next -> op == OP_global_store ) {
510
+ /* Consecutive global stores to same location */
511
+ if (ph2_ir -> src0 == next -> src0 && ph2_ir -> src1 == next -> src1 ) {
512
+ /* Remove first store - it's dead */
513
+ ph2_ir -> dest = next -> dest ;
514
+ ph2_ir -> next = next -> next ;
515
+ return true;
516
+ }
517
+ }
518
+
519
+ if (ph2_ir -> op == OP_global_load && next -> op == OP_global_load ) {
520
+ /* Consecutive global loads from same location */
521
+ if (ph2_ir -> src0 == next -> src0 && ph2_ir -> src1 == next -> src1 ) {
522
+ /* Replace second load with move */
523
+ next -> op = OP_assign ;
524
+ next -> src0 = ph2_ir -> dest ;
525
+ next -> src1 = 0 ;
526
+ return true;
527
+ }
528
+ }
529
+
530
+ return false;
531
+ }
532
+
434
533
/* Main peephole optimization driver.
435
534
* It iterates through all functions, basic blocks, and IR instructions to apply
436
535
* local optimizations on adjacent instruction pairs.
@@ -466,6 +565,10 @@ void peephole(void)
466
565
/* Apply redundant move elimination */
467
566
if (redundant_move_elim (ir ))
468
567
continue ;
568
+
569
+ /* Apply load/store elimination */
570
+ if (eliminate_load_store_pairs (ir ))
571
+ continue ;
469
572
}
470
573
}
471
574
}
0 commit comments