@@ -855,6 +855,87 @@ bool bitwise_optimization(ph2_ir_t *ph2_ir)
855
855
return false;
856
856
}
857
857
858
+ /* Triple pattern optimization: Handle 3-instruction sequences
859
+ * These patterns are more complex but offer significant optimization
860
+ * opportunities Returns true if optimization was applied
861
+ */
862
+ bool triple_pattern_optimization (ph2_ir_t * ph2_ir )
863
+ {
864
+ if (!ph2_ir || !ph2_ir -> next || !ph2_ir -> next -> next )
865
+ return false;
866
+
867
+ ph2_ir_t * second = ph2_ir -> next ;
868
+ ph2_ir_t * third = second -> next ;
869
+
870
+ /* Pattern 1: Store-load-store elimination
871
+ * {store val1, addr; load r, addr; store val2, addr}
872
+ * The middle load is pointless if not used elsewhere
873
+ */
874
+ if (ph2_ir -> op == OP_store && second -> op == OP_load &&
875
+ third -> op == OP_store &&
876
+ ph2_ir -> src1 == second -> src0 && /* same address */
877
+ ph2_ir -> dest == second -> src1 && /* same offset */
878
+ second -> src0 == third -> src1 && /* same address */
879
+ second -> src1 == third -> dest ) { /* same offset */
880
+ /* Check if the loaded value is used by the third store */
881
+ if (third -> src0 != second -> dest ) {
882
+ /* The load result is not used, can eliminate it */
883
+ ph2_ir -> next = third ;
884
+ return true;
885
+ }
886
+ }
887
+
888
+ /* Pattern 2: Consecutive stores to same location
889
+ * {store v1, addr; store v2, addr; store v3, addr}
890
+ * Only the last store matters
891
+ */
892
+ if (ph2_ir -> op == OP_store && second -> op == OP_store &&
893
+ third -> op == OP_store && ph2_ir -> src1 == second -> src1 &&
894
+ ph2_ir -> dest == second -> dest && second -> src1 == third -> src1 &&
895
+ second -> dest == third -> dest ) {
896
+ /* All three stores go to the same location */
897
+ /* Only the last one matters, eliminate first two */
898
+ ph2_ir -> src0 = third -> src0 ; /* Use last value */
899
+ ph2_ir -> next = third -> next ; /* Skip middle stores */
900
+ return true;
901
+ }
902
+
903
+ /* FIXME: Additional patterns for future implementation:
904
+ *
905
+ * Pattern 3: Load-op-store with same location
906
+ * {load r1, [addr]; op r2, r1, ...; store r2, [addr]}
907
+ * Can optimize to in-place operation if possible
908
+ * Requires architecture-specific support in codegen.
909
+ *
910
+ * Pattern 4: Redundant comparison after boolean operation
911
+ * {cmp a, b; load 1; load 0} → simplified when used in branch
912
+ * The comparison already produces 0 or 1, constants may be redundant
913
+ *
914
+ * Pattern 5: Consecutive loads that can be combined
915
+ * {load r1, [base+off1]; load r2, [base+off2]; op r3, r1, r2}
916
+ * Useful for struct member access patterns
917
+ * Needs alignment checking and architecture support.
918
+ *
919
+ * Pattern 6: Load-Load-Select pattern
920
+ * {load r1, c1; load r2, c2; select/cmov based on condition}
921
+ * Can optimize by loading only the needed value
922
+ * Requires control flow analysis.
923
+ *
924
+ * Pattern 7: Add-Add-Add chain simplification
925
+ * {add r1, r0, c1; add r2, r1, c2; add r3, r2, c3}
926
+ * Can be simplified if all are constants
927
+ * Requires tracking constant values through the chain.
928
+ *
929
+ * Pattern 8: Global load followed by immediate use
930
+ * {global_load r1; op r2, r1, ...; store r2}
931
+ * Track global access patterns
932
+ * Could optimize to atomic operations or direct memory ops.
933
+ * Needs careful synchronization analysis.
934
+ */
935
+
936
+ return false;
937
+ }
938
+
858
939
/* Main peephole optimization driver.
859
940
* It iterates through all functions, basic blocks, and IR instructions to apply
860
941
* local optimizations on adjacent instruction pairs.
0 commit comments