@@ -1652,20 +1652,20 @@ static void ir_add_phi_move(ir_ctx *ctx, uint32_t b, ir_ref from, ir_ref to)
16521652 }
16531653}
16541654
1655- #if defined(_WIN32 ) || defined(__APPLE__ ) || defined(__FreeBSD__ )
1656- static int ir_block_cmp (void * data , const void * b1 , const void * b2 )
1657- #else
1658- static int ir_block_cmp (const void * b1 , const void * b2 , void * data )
1659- #endif
1655+ typedef struct _ir_coalesce_block {
1656+ uint32_t b ;
1657+ uint32_t loop_depth ;
1658+ } ir_coalesce_block ;
1659+
1660+ static int ir_block_cmp (const void * b1 , const void * b2 )
16601661{
1661- ir_ctx * ctx = data ;
1662- int d1 = ctx -> cfg_blocks [* (ir_ref * )b1 ].loop_depth ;
1663- int d2 = ctx -> cfg_blocks [* (ir_ref * )b2 ].loop_depth ;
1662+ ir_coalesce_block * d1 = (ir_coalesce_block * )b1 ;
1663+ ir_coalesce_block * d2 = (ir_coalesce_block * )b2 ;
16641664
1665- if (d1 > d2 ) {
1665+ if (d1 -> loop_depth > d2 -> loop_depth ) {
16661666 return -1 ;
1667- } else if (d1 == d2 ) {
1668- if (ctx -> cfg_blocks [ * ( ir_ref * ) b1 ]. start < ctx -> cfg_blocks [ * ( ir_ref * ) b2 ]. start ) {
1667+ } else if (d1 -> loop_depth == d2 -> loop_depth ) {
1668+ if (d1 -> b < d2 -> b ) {
16691669 return -1 ;
16701670 } else {
16711671 return 1 ;
@@ -1815,49 +1815,56 @@ static int ir_try_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn)
18151815
18161816int ir_coalesce (ir_ctx * ctx )
18171817{
1818- uint32_t b , n , succ ;
1818+ uint32_t b , n , succ , pred_b , count = 0 ;
18191819 ir_ref * p , use , input , k , j ;
18201820 ir_block * bb , * succ_bb ;
18211821 ir_use_list * use_list ;
18221822 ir_insn * insn ;
1823- ir_worklist blocks ;
1823+ ir_bitset visited ;
1824+ ir_coalesce_block * list ;
18241825 bool compact = 0 ;
18251826
18261827 /* Collect a list of blocks which are predecossors to block with phi functions */
1827- ir_worklist_init (& blocks , ctx -> cfg_blocks_count + 1 );
1828+ list = ir_mem_malloc (sizeof (ir_coalesce_block ) * ctx -> cfg_blocks_count );
1829+ visited = ir_bitset_malloc (ctx -> cfg_blocks_count + 1 );
18281830 for (b = 1 , bb = & ctx -> cfg_blocks [1 ]; b <= ctx -> cfg_blocks_count ; b ++ , bb ++ ) {
18291831 IR_ASSERT (!(bb -> flags & IR_BB_UNREACHABLE ));
18301832 if (bb -> flags & IR_BB_HAS_PHI ) {
18311833 k = bb -> predecessors_count ;
1832- use_list = & ctx -> use_lists [bb -> start ];
1833- n = use_list -> count ;
1834- IR_ASSERT (k == ctx -> ir_base [bb -> start ].inputs_count );
1835- k ++ ;
1836- for (p = & ctx -> use_edges [use_list -> refs ]; n > 0 ; p ++ , n -- ) {
1837- use = * p ;
1838- insn = & ctx -> ir_base [use ];
1839- if (insn -> op == IR_PHI ) {
1840- for (j = 2 ; j <= k ; j ++ ) {
1841- ir_worklist_push (& blocks , ctx -> cfg_edges [bb -> predecessors + (j - 2 )]);
1834+ if (k > 1 ) {
1835+ use_list = & ctx -> use_lists [bb -> start ];
1836+ n = use_list -> count ;
1837+ IR_ASSERT (k == ctx -> ir_base [bb -> start ].inputs_count );
1838+ for (p = & ctx -> use_edges [use_list -> refs ]; n > 0 ; p ++ , n -- ) {
1839+ use = * p ;
1840+ insn = & ctx -> ir_base [use ];
1841+ if (insn -> op == IR_PHI ) {
1842+ do {
1843+ k -- ;
1844+ pred_b = ctx -> cfg_edges [bb -> predecessors + k ];
1845+ if (!ir_bitset_in (visited , pred_b )) {
1846+ ir_bitset_incl (visited , pred_b );
1847+ list [count ].b = pred_b ;
1848+ list [count ].loop_depth = ctx -> cfg_blocks [pred_b ].loop_depth ;
1849+ count ++ ;
1850+ }
1851+ } while (k > 0 );
1852+ break ;
18421853 }
18431854 }
18441855 }
18451856 }
18461857 }
1858+ ir_mem_free (visited );
18471859
1848- #ifdef _WIN32
1849- # define qsort_fn (base , num , width , func , data ) qsort_s(base, num, width, func, data)
1850- #elif defined(__APPLE__ ) || defined(__FreeBSD__ )
1851- # define qsort_fn (base , num , width , func , data ) qsort_r(base, num, width, data, func)
1852- #else
1853- # define qsort_fn (base , num , width , func , data ) qsort_r(base, num, width, func, data)
1854- #endif
1855- qsort_fn (blocks .l .a .refs , ir_worklist_len (& blocks ), sizeof (ir_ref ), ir_block_cmp , ctx );
1860+ /* Sort blocks according to their loop depth */
1861+ qsort (list , count , sizeof (ir_coalesce_block ), ir_block_cmp );
18561862
1857- while (ir_worklist_len ( & blocks ) ) {
1863+ while (count > 0 ) {
18581864 uint32_t i ;
18591865
1860- b = ir_worklist_pop (& blocks );
1866+ count -- ;
1867+ b = list [count ].b ;
18611868 bb = & ctx -> cfg_blocks [b ];
18621869 IR_ASSERT (bb -> successors_count == 1 );
18631870 succ = ctx -> cfg_edges [bb -> successors ];
@@ -1884,7 +1891,7 @@ int ir_coalesce(ir_ctx *ctx)
18841891 }
18851892 }
18861893 }
1887- ir_worklist_free ( & blocks );
1894+ ir_mem_free ( list );
18881895
18891896 ir_hint_propagation (ctx );
18901897
0 commit comments