Skip to content

Commit 2ae284a

Browse files
Konstantina MitropoulouKonstantina Mitropoulou
authored andcommitted
[AMDGPU] Always emit SI_KILL_I1_PSEUDO for uniform floating point branches.
1 parent 9c8a6cd commit 2ae284a

File tree

3 files changed

+9
-2
lines changed

3 files changed

+9
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2498,6 +2498,9 @@ def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">;
24982498
def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">,
24992499
AssemblerPredicate<(all_of FeatureSALUFloatInsts)>;
25002500

2501+
def NotHasSALUFloatInsts : Predicate<"!Subtarget->hasSALUFloatInsts()">,
2502+
AssemblerPredicate<(all_of (not FeatureSALUFloatInsts))>;
2503+
25012504
def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">,
25022505
AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>;
25032506

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,6 +1063,7 @@ def : GCNPat <
10631063
(SI_KILL_I1_PSEUDO SCSrc_i1:$src, -1)
10641064
>;
10651065

1066+
let SubtargetPredicate = NotHasSALUFloatInsts in
10661067
def : GCNPat <
10671068
(int_amdgcn_kill (i1 (setcc f32:$src, InlineImmFP32:$imm, cond:$cond))),
10681069
(SI_KILL_F32_COND_IMM_PSEUDO VSrc_b32:$src, (bitcast_fpimm_to_i32 $imm), (cond_as_i32imm $cond))

llvm/test/CodeGen/AMDGPU/set_kill_i1_for_floation_point_comparison.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@ define amdgpu_ps void @_amdgpu_ps_main() {
1313
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
1414
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM killed [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load (s32))
1515
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
16-
; CHECK-NEXT: nofpexcept S_CMP_LT_F32 [[S_BUFFER_LOAD_DWORD_IMM]], killed [[S_MOV_B32_1]], implicit-def $scc, implicit $mode
17-
; CHECK-NEXT: SI_KILL_F32_COND_IMM_PSEUDO [[S_BUFFER_LOAD_DWORD_IMM]], 0, 11, implicit-def dead $vcc, implicit $exec
16+
; CHECK-NEXT: nofpexcept S_CMP_NLT_F32 [[S_BUFFER_LOAD_DWORD_IMM]], [[S_MOV_B32_1]], implicit-def $scc, implicit $mode
17+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
18+
; CHECK-NEXT: SI_KILL_I1_PSEUDO killed [[COPY]], 0, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
19+
; CHECK-NEXT: nofpexcept S_CMP_LT_F32 [[S_BUFFER_LOAD_DWORD_IMM]], [[S_MOV_B32_1]], implicit-def $scc, implicit $mode
1820
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
1921
; CHECK-NEXT: S_BRANCH %bb.1
2022
; CHECK-NEXT: {{ $}}
@@ -53,3 +55,4 @@ declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) #2
5355
attributes #0 = { nocallback nofree nounwind }
5456
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
5557
attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
58+

0 commit comments

Comments
 (0)