@@ -120,17 +120,10 @@ stack:
120120 - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
121121body : |
122122 bb.0:
123- ; GFX8-LABEL: name: fold_s_add_i32__mov_fi_const_copy_to_phys_vgpr
124- ; GFX8: $vgpr0 = V_ADD_CO_U32_e32 128, %stack.0, implicit-def dead $vcc, implicit $exec
125- ; GFX8-NEXT: SI_RETURN implicit $vgpr0
126- ;
127- ; GFX9-LABEL: name: fold_s_add_i32__mov_fi_const_copy_to_phys_vgpr
128- ; GFX9: $vgpr0 = V_ADD_U32_e32 128, %stack.0, implicit $exec
129- ; GFX9-NEXT: SI_RETURN implicit $vgpr0
130- ;
131- ; GFX10-LABEL: name: fold_s_add_i32__mov_fi_const_copy_to_phys_vgpr
132- ; GFX10: $vgpr0 = V_ADD_U32_e32 128, %stack.0, implicit $exec
133- ; GFX10-NEXT: SI_RETURN implicit $vgpr0
123+ ; CHECK-LABEL: name: fold_s_add_i32__mov_fi_const_copy_to_phys_vgpr
124+ ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 128, implicit-def dead $scc
125+ ; CHECK-NEXT: $vgpr0 = COPY [[S_ADD_I32_]]
126+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
134127 %0:sreg_32 = S_MOV_B32 %stack.0
135128 %1:sreg_32 = S_ADD_I32 %0, 128, implicit-def dead $scc
136129 $vgpr0 = COPY %1
@@ -535,3 +528,68 @@ body: |
535528 %2:vgpr_32 = COPY %1
536529 SI_RETURN implicit %2
537530 ...
531+
532+ # Physreg copy of %2 to $vgpr0 should not be erased
533+ ---
534+ name : fold_fi_into_s_or_b32_user_is_physreg_copy
535+ tracksRegLiveness : true
536+ stack :
537+ - { id: 0, size: 16, alignment: 16 }
538+ machineFunctionInfo :
539+ scratchRSrcReg : ' $sgpr0_sgpr1_sgpr2_sgpr3'
540+ frameOffsetReg : ' $sgpr33'
541+ stackPtrOffsetReg : ' $sgpr32'
542+ body : |
543+ ; CHECK-LABEL: name: fold_fi_into_s_or_b32_user_is_physreg_copy
544+ ; CHECK: bb.0:
545+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
546+ ; CHECK-NEXT: liveins: $vgpr0_vgpr1
547+ ; CHECK-NEXT: {{ $}}
548+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
549+ ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
550+ ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
551+ ; CHECK-NEXT: {{ $}}
552+ ; CHECK-NEXT: bb.1:
553+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
554+ ; CHECK-NEXT: {{ $}}
555+ ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]].sub0, implicit $exec
556+ ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]].sub1, implicit $exec
557+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
558+ ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE]], [[COPY]], implicit $exec
559+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U64_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
560+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
561+ ; CHECK-NEXT: $vgpr0 = COPY [[S_ADD_I32_]]
562+ ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[REG_SEQUENCE]], 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0
563+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
564+ ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
565+ ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
566+ ; CHECK-NEXT: {{ $}}
567+ ; CHECK-NEXT: bb.2:
568+ ; CHECK-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
569+ ; CHECK-NEXT: SI_RETURN
570+ bb.0:
571+ liveins: $vgpr0_vgpr1
572+
573+ %0:vreg_64 = COPY $vgpr0_vgpr1
574+ %1:sreg_32 = S_MOV_B32 %stack.0
575+ %2:sreg_32 = S_ADD_I32 killed %1, 4, implicit-def dead $scc
576+ %3:sreg_64_xexec = S_MOV_B64 $exec
577+
578+ bb.1:
579+ %4:sgpr_32 = V_READFIRSTLANE_B32 %0.sub0, implicit $exec
580+ %5:sgpr_32 = V_READFIRSTLANE_B32 %0.sub1, implicit $exec
581+ %6:sgpr_64 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1
582+ %7:sreg_64_xexec = V_CMP_EQ_U64_e64 %6, %0, implicit $exec
583+ %8:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed %7, implicit-def $exec, implicit-def $scc, implicit $exec
584+ ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
585+ $vgpr0 = COPY %2
586+ $sgpr30_sgpr31 = SI_CALL %6, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0
587+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
588+ $exec = S_XOR_B64_term $exec, %8, implicit-def $scc
589+ SI_WATERFALL_LOOP %bb.1, implicit $exec
590+
591+ bb.2:
592+ $exec = S_MOV_B64 %3
593+ SI_RETURN
594+
595+ ...
0 commit comments