From b72520020c1261180bc847438f9eeea23bcf89b4 Mon Sep 17 00:00:00 2001 From: Konstantina Mitropoulou Date: Wed, 22 Jan 2025 14:30:06 -0800 Subject: [PATCH 1/3] [NFC] Use GCNPat instead of Pat. --- llvm/lib/Target/AMDGPU/SIInstructions.td | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index aa81d9b7e22a7..8f4d74d4a2afb 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1053,39 +1053,39 @@ def : GCNPat< (SI_ELSE $src, $target) >; -def : Pat < +def : GCNPat < (int_amdgcn_kill i1:$src), (SI_KILL_I1_PSEUDO SCSrc_i1:$src, 0) >; -def : Pat < +def : GCNPat < (int_amdgcn_kill (i1 (not i1:$src))), (SI_KILL_I1_PSEUDO SCSrc_i1:$src, -1) >; -def : Pat < +def : GCNPat < (int_amdgcn_kill (i1 (setcc f32:$src, InlineImmFP32:$imm, cond:$cond))), (SI_KILL_F32_COND_IMM_PSEUDO VSrc_b32:$src, (bitcast_fpimm_to_i32 $imm), (cond_as_i32imm $cond)) >; -def : Pat < +def : GCNPat < (int_amdgcn_wqm_demote i1:$src), (SI_DEMOTE_I1 SCSrc_i1:$src, 0) >; -def : Pat < +def : GCNPat < (int_amdgcn_wqm_demote (i1 (not i1:$src))), (SI_DEMOTE_I1 SCSrc_i1:$src, -1) >; // TODO: we could add more variants for other types of conditionals -def : Pat < +def : GCNPat < (i64 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))), (COPY $src) // Return the SGPRs representing i1 src >; -def : Pat < +def : GCNPat < (i32 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))), (COPY $src) // Return the SGPRs representing i1 src >; From 3f19ceb3bcd87289d01c40e90c578bb6195fbd12 Mon Sep 17 00:00:00 2001 From: Konstantina Mitropoulou Date: Thu, 23 Jan 2025 19:46:36 -0800 Subject: [PATCH 2/3] Add a new test with SI_KILL_F32_COND_IMM_PSEUDO --- ...t_kill_i1_for_floation_point_comparison.ll | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/set_kill_i1_for_floation_point_comparison.ll diff --git a/llvm/test/CodeGen/AMDGPU/set_kill_i1_for_floation_point_comparison.ll b/llvm/test/CodeGen/AMDGPU/set_kill_i1_for_floation_point_comparison.ll new file mode 100644 index 0000000000000..84afbde2877f5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/set_kill_i1_for_floation_point_comparison.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=amdgpu-isel < %s 2>&1 | FileCheck %s + +define amdgpu_ps void @_amdgpu_ps_main() { + ; CHECK-LABEL: name: _amdgpu_ps_main + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3 + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM killed [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; CHECK-NEXT: nofpexcept S_CMP_LT_F32 [[S_BUFFER_LOAD_DWORD_IMM]], killed [[S_MOV_B32_1]], implicit-def $scc, implicit $mode + ; CHECK-NEXT: SI_KILL_F32_COND_IMM_PSEUDO [[S_BUFFER_LOAD_DWORD_IMM]], 0, 11, implicit-def dead $vcc, implicit $exec + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.bb1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.bb2: + ; CHECK-NEXT: S_ENDPGM 0 +entry: + %i = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> zeroinitializer, i32 0, i32 0) + %i1 = bitcast i32 %i to float + %i2 = fcmp uge float %i1, 0.000000e+00 + call void @llvm.amdgcn.kill(i1 %i2) + br i1 %i2, label %bb1, label %bb2 + +bb1: ; preds = %entry + %i3 = call i64 @llvm.amdgcn.s.getpc() + %i4 = and i64 %i3, 1 + %i5 = inttoptr i64 %i4 to ptr addrspace(4) + %i6 = getelementptr i8, ptr addrspace(4) %i5, i64 32 + br label %bb2 + +bb2: ; preds = %bb, %entry + ret void +} From 26ef9ed0870b218b3020269633da3fd1cc84bbee Mon Sep 17 00:00:00 2001 From: Konstantina Mitropoulou Date: Wed, 22 Jan 2025 15:13:00 -0800 Subject: [PATCH 3/3] [AMDGPU] Always emit SI_KILL_I1_PSEUDO for uniform floating point branches. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 3 +++ llvm/lib/Target/AMDGPU/SIInstructions.td | 1 + .../AMDGPU/set_kill_i1_for_floation_point_comparison.ll | 6 ++++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 7ad6720b8001a..6439149d801f6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2498,6 +2498,9 @@ def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">; def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">, AssemblerPredicate<(all_of FeatureSALUFloatInsts)>; +def NotHasSALUFloatInsts : Predicate<"!Subtarget->hasSALUFloatInsts()">, + AssemblerPredicate<(all_of (not FeatureSALUFloatInsts))>; + def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">, AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 8f4d74d4a2afb..5af46989aca97 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1063,6 +1063,7 @@ def : GCNPat < (SI_KILL_I1_PSEUDO SCSrc_i1:$src, -1) >; +let SubtargetPredicate = NotHasSALUFloatInsts in def : GCNPat < (int_amdgcn_kill (i1 (setcc f32:$src, InlineImmFP32:$imm, cond:$cond))), (SI_KILL_F32_COND_IMM_PSEUDO VSrc_b32:$src, (bitcast_fpimm_to_i32 $imm), (cond_as_i32imm $cond)) diff --git a/llvm/test/CodeGen/AMDGPU/set_kill_i1_for_floation_point_comparison.ll b/llvm/test/CodeGen/AMDGPU/set_kill_i1_for_floation_point_comparison.ll index 84afbde2877f5..5f101c360f148 100644 --- a/llvm/test/CodeGen/AMDGPU/set_kill_i1_for_floation_point_comparison.ll +++ b/llvm/test/CodeGen/AMDGPU/set_kill_i1_for_floation_point_comparison.ll @@ -10,8 +10,10 @@ define amdgpu_ps void @_amdgpu_ps_main() { ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM killed [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 - ; CHECK-NEXT: nofpexcept S_CMP_LT_F32 [[S_BUFFER_LOAD_DWORD_IMM]], killed [[S_MOV_B32_1]], implicit-def $scc, implicit $mode - ; CHECK-NEXT: SI_KILL_F32_COND_IMM_PSEUDO [[S_BUFFER_LOAD_DWORD_IMM]], 0, 11, implicit-def dead $vcc, implicit $exec + ; CHECK-NEXT: nofpexcept S_CMP_NLT_F32 [[S_BUFFER_LOAD_DWORD_IMM]], [[S_MOV_B32_1]], implicit-def $scc, implicit $mode + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc + ; CHECK-NEXT: SI_KILL_I1_PSEUDO killed [[COPY]], 0, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: nofpexcept S_CMP_LT_F32 [[S_BUFFER_LOAD_DWORD_IMM]], [[S_MOV_B32_1]], implicit-def $scc, implicit $mode ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}}