@@ -320,116 +320,6 @@ bool redundant_move_elim(ph2_ir_t *ph2_ir)
320
320
return false;
321
321
}
322
322
323
- /* Simple dead instruction elimination within basic blocks.
324
- * Removes instructions whose results are never used (dead stores).
325
- * Works in conjunction with existing SSA-based DCE.
326
- */
327
- bool eliminate_dead_instructions (func_t * func )
328
- {
329
- if (!func || !func -> bbs )
330
- return false;
331
-
332
- bool changed = false;
333
-
334
- for (basic_block_t * bb = func -> bbs ; bb ; bb = bb -> rpo_next ) {
335
- ph2_ir_t * ir = bb -> ph2_ir_list .head ;
336
- while (ir && ir -> next ) {
337
- ph2_ir_t * next = ir -> next ;
338
-
339
- /* Check if next instruction immediately overwrites this one's
340
- * result */
341
- if (ir -> op == OP_load_constant && next -> op == OP_load_constant &&
342
- ir -> dest == next -> dest ) {
343
- /* Consecutive constant loads to same register - first is dead
344
- */
345
- ir -> next = next -> next ;
346
- if (next == bb -> ph2_ir_list .tail ) {
347
- bb -> ph2_ir_list .tail = ir ;
348
- }
349
- changed = true;
350
- continue ;
351
- }
352
-
353
- /* Check for dead arithmetic results */
354
- if ((ir -> op == OP_add || ir -> op == OP_sub || ir -> op == OP_mul ) &&
355
- next -> op == OP_assign && ir -> dest == next -> dest ) {
356
- /* Arithmetic result immediately overwritten by assignment */
357
- ir -> next = next -> next ;
358
- if (next == bb -> ph2_ir_list .tail ) {
359
- bb -> ph2_ir_list .tail = ir ;
360
- }
361
- changed = true;
362
- continue ;
363
- }
364
-
365
- ir = ir -> next ;
366
- }
367
- }
368
-
369
- return changed ;
370
- }
371
-
372
- /* Simple constant folding for branches after SCCP.
373
- * Converts branches with obvious constant conditions to jumps.
374
- * Very conservative to maintain bootstrap stability.
375
- */
376
- bool fold_constant_branches (func_t * func )
377
- {
378
- if (!func || !func -> bbs )
379
- return false;
380
-
381
- bool changed = false;
382
-
383
- for (basic_block_t * bb = func -> bbs ; bb ; bb = bb -> rpo_next ) {
384
- if (!bb -> ph2_ir_list .tail )
385
- continue ;
386
-
387
- ph2_ir_t * last = bb -> ph2_ir_list .tail ;
388
-
389
- /* Only handle branches */
390
- if (last -> op != OP_branch || last -> src0 < 0 )
391
- continue ;
392
-
393
- /* Look for immediately preceding constant load to the same register */
394
- ph2_ir_t * prev = bb -> ph2_ir_list .head ;
395
- ph2_ir_t * found = NULL ;
396
-
397
- /* Find the most recent constant load to the branch condition register
398
- */
399
- while (prev && prev != last ) {
400
- if (prev -> op == OP_load_constant && prev -> dest == last -> src0 ) {
401
- found = prev ;
402
- /* Keep looking - want the most recent load */
403
- }
404
- /* Stop if we see any other write to this register */
405
- else if (prev -> dest == last -> src0 ) {
406
- found = NULL ; /* Register was modified, can't fold */
407
- }
408
- prev = prev -> next ;
409
- }
410
-
411
- if (found ) {
412
- /* Found constant condition - convert branch to jump */
413
- int const_val = found -> src0 ;
414
-
415
- /* Just change the opcode, don't modify CFG edges directly */
416
- last -> op = OP_jump ;
417
-
418
- if (const_val != 0 ) {
419
- /* Always take then branch */
420
- last -> next_bb = bb -> then_ ;
421
- } else {
422
- /* Always take else branch */
423
- last -> next_bb = bb -> else_ ;
424
- }
425
-
426
- /* Don't modify src0 or CFG edges - let later passes handle it */
427
- changed = true;
428
- }
429
- }
430
-
431
- return changed ;
432
- }
433
323
434
324
/* Load/store elimination for consecutive memory operations.
435
325
* Removes redundant loads and dead stores that access the same memory location.
@@ -937,37 +827,70 @@ bool triple_pattern_optimization(ph2_ir_t *ph2_ir)
937
827
}
938
828
939
829
/* Main peephole optimization driver.
940
- * It iterates through all functions, basic blocks, and IR instructions to apply
941
- * local optimizations on adjacent instruction pairs.
830
+ *
831
+ * SSA Optimizer (insn_t, before register allocation):
832
+ * - Constant folding with known values (5+3 → 8, x+0 → x)
833
+ * - Common subexpression elimination
834
+ * - Self-assignment elimination (x = x)
835
+ * - Dead code elimination
836
+ * - Constant comparison folding (5 < 3 → 0)
837
+ *
838
+ * Peephole Optimizer (ph2_ir_t, after register allocation):
839
+ * - Register-based self-operations (r1-r1 → 0, r1^r1 → 0)
840
+ * - Bitwise operation optimization (SSA doesn't handle these)
841
+ * - Strength reduction for power-of-2 (needs actual constants loaded)
842
+ * - Load/store pattern elimination
843
+ * - Triple instruction sequence optimization
844
+ * - Architecture-specific instruction fusion
845
+ *
846
+ * This refined separation eliminates redundant optimizations while
847
+ * maintaining comprehensive coverage of optimization opportunities.
942
848
*/
943
849
void peephole (void )
944
850
{
945
851
for (func_t * func = FUNC_LIST .head ; func ; func = func -> next ) {
946
- /* Phase 1: Dead code elimination working with SCCP results */
947
- eliminate_dead_instructions (func );
948
- fold_constant_branches (func );
949
-
950
- /* Phase 2: Local peephole optimizations */
852
+ /* Local peephole optimizations on post-register-allocation IR */
951
853
for (basic_block_t * bb = func -> bbs ; bb ; bb = bb -> rpo_next ) {
952
854
for (ph2_ir_t * ir = bb -> ph2_ir_list .head ; ir ; ir = ir -> next ) {
953
855
ph2_ir_t * next = ir -> next ;
954
856
if (!next )
955
857
continue ;
956
858
957
859
/* Self-assignment elimination
958
- * Removes trivial assignments where destination equals source
959
- * Pattern: {mov x, x} → eliminated
960
- * Common in compiler-generated intermediate code
860
+ * Keep this as a safety net: SSA handles most cases, but
861
+ * register allocation might create new self-assignments
961
862
*/
962
863
if (next -> op == OP_assign && next -> dest == next -> src0 ) {
963
864
ir -> next = next -> next ;
964
865
continue ;
965
866
}
966
867
967
- /* Try instruction fusion first */
868
+ /* Try triple pattern optimization first (3-instruction
869
+ * sequences)
870
+ */
871
+ if (triple_pattern_optimization (ir ))
872
+ continue ;
873
+
874
+ /* Try instruction fusion (2-instruction sequences) */
968
875
if (insn_fusion (ir ))
969
876
continue ;
970
877
878
+ /* Apply comparison optimization */
879
+ if (comparison_optimization (ir ))
880
+ continue ;
881
+
882
+ /* Apply strength reduction for power-of-2 operations */
883
+ if (strength_reduction (ir ))
884
+ continue ;
885
+
886
+ /* Apply algebraic simplification */
887
+ if (algebraic_simplification (ir ))
888
+ continue ;
889
+
890
+ /* Apply bitwise operation optimizations */
891
+ if (bitwise_optimization (ir ))
892
+ continue ;
893
+
971
894
/* Apply redundant move elimination */
972
895
if (redundant_move_elim (ir ))
973
896
continue ;
0 commit comments