Skip to content

Commit 430b60e

Browse files
committed
[AMDGPU] Fix to prevent sinking of PERMLANE_SWAP instruction
Permlane_swap instruction depends on exec mask, added isConvergent flag to prevent sinking of instruction. Fixes SWDEV-537232
1 parent d313c09 commit 430b60e

File tree

3 files changed

+67
-1
lines changed

3 files changed

+67
-1
lines changed

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -775,7 +775,8 @@ defm V_PRNG_B32 : VOP1Inst <"v_prng_b32", VOP_I32_I32, int_amdgcn_prng_b32>;
775775

776776
let Constraints = "$vdst = $vdst_in, $src0_out = $src0",
777777
DisableEncoding="$vdst_in,$src0_out",
778-
SchedRW = [Write32Bit, Write32Bit] in {
778+
SchedRW = [Write32Bit, Write32Bit],
779+
isConvergent = 1 in {
779780
let SubtargetPredicate = HasPermlane16Swap in {
780781
defm V_PERMLANE16_SWAP_B32 : VOP1Inst<"v_permlane16_swap_b32", VOP_PERMLANE_SWAP>;
781782
}
@@ -1550,8 +1551,11 @@ defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>;
15501551
defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;
15511552

15521553
defm V_PRNG_B32 : VOP1_Real_gfx9 <0x58>;
1554+
1555+
let isConvergent = 1 in {
15531556
defm V_PERMLANE16_SWAP_B32 : VOP1_OpSel_Real_e32e64_gfx9<0x059>;
15541557
defm V_PERMLANE32_SWAP_B32 : VOP1_OpSel_Real_e32e64_gfx9<0x05a>;
1558+
}
15551559

15561560
class MovDPP8Pattern<Predicate Pred, Instruction Inst, ValueType vt> : GCNPat <
15571561
(vt (int_amdgcn_mov_dpp8 vt:$src, timm:$dpp8)),

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ class LetDummies {
1515
bit isConvertibleToThreeAddress;
1616
bit isMoveImm;
1717
bit isReMaterializable;
18+
bit isConvergent;
1819
bit isAsCheapAsAMove;
1920
bit FPDPRounding;
2021
Predicate SubtargetPredicate;

llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -733,3 +733,64 @@ body: |
733733
liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vcc
734734
S_ENDPGM 0
735735
...
736+
---
737+
name: test_no_sink_permlane_swap
738+
tracksRegLiveness: true
739+
machineFunctionInfo:
740+
isEntryFunction: true
741+
body: |
742+
; GFX9-LABEL: name: test_no_sink_permlane_swap
743+
; GFX9: bb.0:
744+
; GFX9-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
745+
; GFX9-NEXT: liveins: $vgpr0
746+
; GFX9-NEXT: {{ $}}
747+
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
748+
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
749+
; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
750+
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]]
751+
; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
752+
; GFX9-NEXT: [[V_PERMLANE32_SWAP_B32_e64_:%[0-9]+]]:vgpr_32, [[V_PERMLANE32_SWAP_B32_e64_1:%[0-9]+]]:vgpr_32 = V_PERMLANE32_SWAP_B32_e64 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], 0, 0, implicit $exec
753+
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
754+
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
755+
; GFX9-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[COPY2]](s32), [[S_MOV_B32_]], implicit $exec
756+
; GFX9-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF [[V_CMP_LT_I32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
757+
; GFX9-NEXT: S_BRANCH %bb.1
758+
; GFX9-NEXT: {{ $}}
759+
; GFX9-NEXT: bb.1:
760+
; GFX9-NEXT: successors: %bb.2(0x80000000)
761+
; GFX9-NEXT: {{ $}}
762+
; GFX9-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_PERMLANE32_SWAP_B32_e64_]], [[V_PERMLANE32_SWAP_B32_e64_1]], implicit $exec
763+
; GFX9-NEXT: {{ $}}
764+
; GFX9-NEXT: bb.2:
765+
; GFX9-NEXT: successors: %bb.3(0x80000000)
766+
; GFX9-NEXT: {{ $}}
767+
; GFX9-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B32_e32_]], %bb.0, [[V_MAX_I32_e64_]], %bb.1
768+
; GFX9-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
769+
; GFX9-NEXT: {{ $}}
770+
; GFX9-NEXT: bb.3:
771+
; GFX9-NEXT: S_ENDPGM 0, implicit [[PHI]]
772+
bb.0:
773+
liveins: $vgpr0
774+
%1:vgpr_32 = COPY $vgpr0
775+
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
776+
%5:sreg_64 = S_MOV_B64 0
777+
%7:vreg_64 = COPY %5
778+
%9:vgpr_32 = GLOBAL_LOAD_DWORD killed %7, 0, 0, implicit $exec :: (load (s32), addrspace 1)
779+
%10:vgpr_32, %11:vgpr_32 = V_PERMLANE32_SWAP_B32_e64 %9:vgpr_32, %9:vgpr_32, 0, 0, implicit $exec
780+
%15:vgpr_32(s32) = COPY $vgpr0
781+
%16:sreg_32 = S_MOV_B32 1
782+
%17:sreg_64 = V_CMP_LT_I32_e64 %15(s32), %16, implicit $exec
783+
%18:sreg_64 = COPY %17
784+
%19:sreg_64 = SI_IF %18, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
785+
S_BRANCH %bb.1
786+
787+
bb.1:
788+
%20:vgpr_32 = V_MAX_I32_e64 %10:vgpr_32, %11:vgpr_32, implicit $exec
789+
790+
bb.2:
791+
%22:vgpr_32 = PHI %3, %bb.0, %20, %bb.1
792+
SI_END_CF %19, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
793+
794+
bb.3:
795+
S_ENDPGM 0, implicit %22
796+
...

0 commit comments

Comments
 (0)