Skip to content

Commit 6fd8c15

Browse files
Konstantina MitropoulouKonstantina Mitropoulou
authored andcommitted
[AMDGPU] Always emit SI_KILL_I1_PSEUDO for uniform floating point branches.
1 parent 9c8a6cd commit 6fd8c15

File tree

3 files changed

+29
-20
lines changed

3 files changed

+29
-20
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2498,6 +2498,9 @@ def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">;
24982498
def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">,
24992499
AssemblerPredicate<(all_of FeatureSALUFloatInsts)>;
25002500

2501+
def NotHasSALUFloatInsts : Predicate<"!Subtarget->hasSALUFloatInsts()">,
2502+
AssemblerPredicate<(all_of (not FeatureSALUFloatInsts))>;
2503+
25012504
def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">,
25022505
AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>;
25032506

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,6 +1063,7 @@ def : GCNPat <
10631063
(SI_KILL_I1_PSEUDO SCSrc_i1:$src, -1)
10641064
>;
10651065

1066+
let SubtargetPredicate = NotHasSALUFloatInsts in
10661067
def : GCNPat <
10671068
(int_amdgcn_kill (i1 (setcc f32:$src, InlineImmFP32:$imm, cond:$cond))),
10681069
(SI_KILL_F32_COND_IMM_PSEUDO VSrc_b32:$src, (bitcast_fpimm_to_i32 $imm), (cond_as_i32imm $cond))

llvm/test/CodeGen/AMDGPU/set_kill_i1_for_floation_point_comparison.ll

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,33 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=amdgpu-isel < %s 2>&1 | FileCheck %s
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s 2>&1 | FileCheck %s
33

44
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
55
target triple = "amdgcn--amdpal"
66

77
define amdgpu_ps void @_amdgpu_ps_main() {
8-
; CHECK-LABEL: name: _amdgpu_ps_main
9-
; CHECK: bb.0..entry:
10-
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
11-
; CHECK-NEXT: {{ $}}
12-
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
13-
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
14-
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM killed [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load (s32))
15-
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
16-
; CHECK-NEXT: nofpexcept S_CMP_LT_F32 [[S_BUFFER_LOAD_DWORD_IMM]], killed [[S_MOV_B32_1]], implicit-def $scc, implicit $mode
17-
; CHECK-NEXT: SI_KILL_F32_COND_IMM_PSEUDO [[S_BUFFER_LOAD_DWORD_IMM]], 0, 11, implicit-def dead $vcc, implicit $exec
18-
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
19-
; CHECK-NEXT: S_BRANCH %bb.1
20-
; CHECK-NEXT: {{ $}}
21-
; CHECK-NEXT: bb.1 (%ir-block.3):
22-
; CHECK-NEXT: successors: %bb.2(0x80000000)
23-
; CHECK-NEXT: {{ $}}
24-
; CHECK-NEXT: bb.2 (%ir-block.5):
25-
; CHECK-NEXT: S_ENDPGM 0
8+
; CHECK-LABEL: _amdgpu_ps_main:
9+
; CHECK: ; %bb.0: ; %.entry
10+
; CHECK-NEXT: s_mov_b32 s0, 0
11+
; CHECK-NEXT: s_mov_b32 s4, exec_lo
12+
; CHECK-NEXT: s_mov_b32 s1, s0
13+
; CHECK-NEXT: s_mov_b32 s2, s0
14+
; CHECK-NEXT: s_mov_b32 s3, s0
15+
; CHECK-NEXT: s_buffer_load_b32 s0, s[0:3], 0x0
16+
; CHECK-NEXT: s_wait_kmcnt 0x0
17+
; CHECK-NEXT: s_cmp_nlt_f32 s0, 0
18+
; CHECK-NEXT: s_cselect_b32 s1, -1, 0
19+
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
20+
; CHECK-NEXT: s_and_not1_b32 s1, exec_lo, s1
21+
; CHECK-NEXT: s_and_not1_b32 s4, s4, s1
22+
; CHECK-NEXT: s_cbranch_scc0 .LBB0_2
23+
; CHECK-NEXT: ; %bb.1: ; %.entry
24+
; CHECK-NEXT: s_and_b32 exec_lo, exec_lo, s4
25+
; CHECK-NEXT: s_cmp_lt_f32 s0, 0
26+
; CHECK-NEXT: s_endpgm
27+
; CHECK-NEXT: .LBB0_2:
28+
; CHECK-NEXT: s_mov_b32 exec_lo, 0
29+
; CHECK-NEXT: export mrt0 off, off, off, off done
30+
; CHECK-NEXT: s_endpgm
2631
.entry:
2732
%0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> zeroinitializer, i32 0, i32 0)
2833
%1 = bitcast i32 %0 to float

0 commit comments

Comments
 (0)