Skip to content

Commit 7c7d34d

Browse files
committed
Add comprehensive load/store elimination
This extends load/store elimination with more aggressive patterns, reducing memory traffic by eliminating redundant memory operations. Local memory optimizations: - Dead store elimination: Consecutive stores to same location - Redundant load elimination: Consecutive loads from same location - Store-to-load forwarding: Replace load with stored value - Load-store redundancy: Remove store of just-loaded value Global memory optimizations: - Global dead store elimination - Global redundant load elimination
1 parent 077113f commit 7c7d34d

File tree

1 file changed

+103
-0
lines changed

1 file changed

+103
-0
lines changed

src/peephole.c

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,105 @@ bool fold_constant_branches(func_t *func)
431431
return changed;
432432
}
433433

434+
/* Load/store elimination for consecutive memory operations.
435+
* Removes redundant loads and dead stores that access the same memory location.
436+
* Conservative implementation to maintain bootstrap stability.
437+
*/
438+
bool eliminate_load_store_pairs(ph2_ir_t *ph2_ir)
439+
{
440+
ph2_ir_t *next = ph2_ir->next;
441+
if (!next)
442+
return false;
443+
444+
/* Only handle local loads/stores for now (not globals) to be safe */
445+
446+
/* Pattern 1: Consecutive stores to same local location
447+
* {store [addr], val1; store [addr], val2} → {store [addr], val2}
448+
* First store is dead if immediately overwritten
449+
*/
450+
if (ph2_ir->op == OP_store && next->op == OP_store) {
451+
/* Check if storing to same memory location */
452+
if (ph2_ir->src0 == next->src0 && ph2_ir->src1 == next->src1 &&
453+
ph2_ir->src0 >= 0 && ph2_ir->src1 >= 0) {
454+
/* Remove first store - it's dead */
455+
ph2_ir->dest = next->dest;
456+
ph2_ir->next = next->next;
457+
return true;
458+
}
459+
}
460+
461+
/* Pattern 2: Redundant consecutive loads from same local location
462+
* {load rd1, [addr]; load rd2, [addr]} → {load rd1, [addr]; mov rd2, rd1}
463+
* Second load can reuse the first load's result
464+
* Only apply if addresses are simple (not complex expressions)
465+
*/
466+
if (ph2_ir->op == OP_load && next->op == OP_load) {
467+
/* Check if loading from same memory location */
468+
if (ph2_ir->src0 == next->src0 && ph2_ir->src1 == next->src1 &&
469+
ph2_ir->src0 >= 0 && ph2_ir->src1 >= 0) {
470+
/* Replace second load with move */
471+
next->op = OP_assign;
472+
next->src0 = ph2_ir->dest; /* Result of first load */
473+
next->src1 = 0;
474+
return true;
475+
}
476+
}
477+
478+
/* Pattern 3: Store followed by load from same location (store-to-load
479+
* forwarding) {store [addr], val; load rd, [addr]} → {store [addr], val;
480+
* mov rd, val} The load can use the stored value directly
481+
*/
482+
if (ph2_ir->op == OP_store && next->op == OP_load) {
483+
/* Check if accessing same memory location */
484+
if (ph2_ir->src0 == next->src0 && ph2_ir->src1 == next->src1 &&
485+
ph2_ir->src0 >= 0 && ph2_ir->dest >= 0) {
486+
/* Replace load with move of stored value */
487+
next->op = OP_assign;
488+
next->src0 = ph2_ir->dest; /* Value that was stored */
489+
next->src1 = 0;
490+
return true;
491+
}
492+
}
493+
494+
/* Pattern 4: Load followed by redundant store of same value
495+
* {load rd, [addr]; store [addr], rd} → {load rd, [addr]}
496+
* The store is redundant if storing back the just-loaded value
497+
*/
498+
if (ph2_ir->op == OP_load && next->op == OP_store) {
499+
/* Check if storing the value we just loaded from same location */
500+
if (ph2_ir->dest == next->dest && ph2_ir->src0 == next->src0 &&
501+
ph2_ir->src1 == next->src1 && ph2_ir->src0 >= 0) {
502+
/* Remove redundant store */
503+
ph2_ir->next = next->next;
504+
return true;
505+
}
506+
}
507+
508+
/* Pattern 5: Global store/load optimizations (carefully enabled) */
509+
if (ph2_ir->op == OP_global_store && next->op == OP_global_store) {
510+
/* Consecutive global stores to same location */
511+
if (ph2_ir->src0 == next->src0 && ph2_ir->src1 == next->src1) {
512+
/* Remove first store - it's dead */
513+
ph2_ir->dest = next->dest;
514+
ph2_ir->next = next->next;
515+
return true;
516+
}
517+
}
518+
519+
if (ph2_ir->op == OP_global_load && next->op == OP_global_load) {
520+
/* Consecutive global loads from same location */
521+
if (ph2_ir->src0 == next->src0 && ph2_ir->src1 == next->src1) {
522+
/* Replace second load with move */
523+
next->op = OP_assign;
524+
next->src0 = ph2_ir->dest;
525+
next->src1 = 0;
526+
return true;
527+
}
528+
}
529+
530+
return false;
531+
}
532+
434533
/* Main peephole optimization driver.
435534
* It iterates through all functions, basic blocks, and IR instructions to apply
436535
* local optimizations on adjacent instruction pairs.
@@ -466,6 +565,10 @@ void peephole(void)
466565
/* Apply redundant move elimination */
467566
if (redundant_move_elim(ir))
468567
continue;
568+
569+
/* Apply load/store elimination */
570+
if (eliminate_load_store_pairs(ir))
571+
continue;
469572
}
470573
}
471574
}

0 commit comments

Comments
 (0)