2828# | 5 |
2929# +---+
3030
31- --- |
32- define void @favor_always_benef() {
33- ret void
34- }
35- define void @favor_live_through_in_high_freq_region() {
36- ret void
37- }
38- define void @use_only_region_possible() {
39- ret void
40- }
41- ---
4231# Rematerializing %32 is always beneficial because the defining and using
4332# regions have the same frequency. It should be rematerialized first.
4433name : favor_always_benef
@@ -51,12 +40,12 @@ body: |
5140 ; CHECK-NEXT: successors: %bb.1(0x80000000)
5241 ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
5342 ; CHECK-NEXT: {{ $}}
54- ; CHECK-NEXT: %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
55- ; CHECK-NEXT: %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
56- ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4) , 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
43+ ; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
44+ ; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0
45+ ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
5746 ; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
5847 ; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
59- ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound(s32) , implicit $exec
48+ ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec
6049 ; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
6150 ; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
6251 ; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
@@ -126,10 +115,10 @@ body: |
126115 bb.0:
127116 liveins: $vgpr0, $sgpr0_sgpr1
128117
129- %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
130- %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
131- %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4) , 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
132- %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound(s32) , implicit $exec
118+ %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
119+ %loop_if_bound:vgpr_32 = COPY $vgpr0
120+ %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
121+ %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound, implicit $exec
133122 %loop_counter:sreg_32 = COPY %mem_data.sub1
134123
135124 %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
@@ -217,12 +206,12 @@ body: |
217206 ; CHECK-NEXT: successors: %bb.1(0x80000000)
218207 ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
219208 ; CHECK-NEXT: {{ $}}
220- ; CHECK-NEXT: %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
221- ; CHECK-NEXT: %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
222- ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4) , 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
209+ ; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
210+ ; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0
211+ ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
223212 ; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
224213 ; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
225- ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound(s32) , implicit $exec
214+ ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec
226215 ; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
227216 ; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
228217 ; CHECK-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
@@ -292,10 +281,10 @@ body: |
292281 bb.0:
293282 liveins: $vgpr0, $sgpr0_sgpr1
294283
295- %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
296- %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
297- %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4) , 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
298- %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound(s32) , implicit $exec
284+ %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
285+ %loop_if_bound:vgpr_32 = COPY $vgpr0
286+ %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
287+ %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound, implicit $exec
299288 %loop_counter:sreg_32 = COPY %mem_data.sub1
300289
301290 %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
@@ -381,9 +370,9 @@ body: |
381370 ; CHECK-NEXT: successors: %bb.1(0x80000000)
382371 ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
383372 ; CHECK-NEXT: {{ $}}
384- ; CHECK-NEXT: %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
385- ; CHECK-NEXT: %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
386- ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4) , 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
373+ ; CHECK-NEXT: %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
374+ ; CHECK-NEXT: %loop_if_bound:vgpr_32 = COPY $vgpr0
375+ ; CHECK-NEXT: %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
387376 ; CHECK-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
388377 ; CHECK-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
389378 ; CHECK-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
@@ -408,7 +397,7 @@ body: |
408397 ; CHECK-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
409398 ; CHECK-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode, implicit-def $m0
410399 ; CHECK-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
411- ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound(s32) , implicit $exec
400+ ; CHECK-NEXT: %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, %loop_if_bound, implicit $exec
412401 ; CHECK-NEXT: %loop_counter:sreg_32 = COPY %mem_data.sub1
413402 ; CHECK-NEXT: {{ $}}
414403 ; CHECK-NEXT: bb.1:
@@ -456,10 +445,10 @@ body: |
456445 bb.0:
457446 liveins: $vgpr0, $sgpr0_sgpr1
458447
459- %mem_addr:sgpr_64(p4) = COPY $sgpr0_sgpr1
460- %loop_if_bound:vgpr_32(s32) = COPY $vgpr0
461- %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr(p4) , 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
462- %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound(s32) , implicit $exec
448+ %mem_addr:sgpr_64 = COPY $sgpr0_sgpr1
449+ %loop_if_bound:vgpr_32 = COPY $vgpr0
450+ %mem_data:sreg_64_xexec = S_LOAD_DWORDX2_IMM %mem_addr, 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
451+ %exec_loop_mask:sreg_64 = V_CMP_GT_U32_e64 %mem_data.sub0, killed %loop_if_bound, implicit $exec
463452 %loop_counter:sreg_32 = COPY %mem_data.sub1
464453
465454 %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
0 commit comments