Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1385,7 +1385,8 @@ The AMDGPU backend implements the following LLVM IR intrinsics.

llvm.amdgcn.sched.barrier Controls the types of instructions that may be allowed to cross the intrinsic
during instruction scheduling. The parameter is a mask for the instruction types
that can cross the intrinsic.
that can cross the intrinsic. When bits for specific instructions are set, their
more general version (all ALU or all VMEM) is ignored.

- 0x0000: No instructions may be scheduled across sched_barrier.
- 0x0001: All, non-memory, non-side-effect producing instructions may be
Expand Down
17 changes: 15 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2619,8 +2619,14 @@ IGroupLPDAGMutation::invertSchedBarrierMask(SchedGroupMask Mask) const {
// allowed past the SCHED_BARRIER.
SchedGroupMask InvertedMask = ~Mask;

// When given specific bits overrule the more general ALU type.
bool HasConcreteALUClassSpecified =
(Mask & (SchedGroupMask::SALU | SchedGroupMask::VALU |
SchedGroupMask::MFMA)) != SchedGroupMask::NONE;

// ALU implies VALU, SALU, MFMA, TRANS.
if ((InvertedMask & SchedGroupMask::ALU) == SchedGroupMask::NONE)
if (!HasConcreteALUClassSpecified &&
(InvertedMask & SchedGroupMask::ALU) == SchedGroupMask::NONE)
InvertedMask &= ~SchedGroupMask::VALU & ~SchedGroupMask::SALU &
~SchedGroupMask::MFMA & ~SchedGroupMask::TRANS;
// VALU, SALU, MFMA, TRANS implies ALU.
Expand All @@ -2630,8 +2636,15 @@ IGroupLPDAGMutation::invertSchedBarrierMask(SchedGroupMask Mask) const {
(InvertedMask & SchedGroupMask::TRANS) == SchedGroupMask::NONE)
InvertedMask &= ~SchedGroupMask::ALU;

// When given specific bits overrule the more general MEM type.
bool HasConcreteMemClassSpecified =
(Mask & (SchedGroupMask::VMEM_READ | SchedGroupMask::VMEM_WRITE |
SchedGroupMask::DS_READ | SchedGroupMask::DS_WRITE)) !=
SchedGroupMask::NONE;

// VMEM implies VMEM_READ, VMEM_WRITE.
if ((InvertedMask & SchedGroupMask::VMEM) == SchedGroupMask::NONE)
if (!HasConcreteMemClassSpecified &&
(InvertedMask & SchedGroupMask::VMEM) == SchedGroupMask::NONE)
InvertedMask &= ~SchedGroupMask::VMEM_READ & ~SchedGroupMask::VMEM_WRITE;
// VMEM_READ, VMEM_WRITE implies VMEM.
else if ((InvertedMask & SchedGroupMask::VMEM_READ) == SchedGroupMask::NONE ||
Expand Down
175 changes: 175 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.barrier.alu-bit.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
; RUN: llc -mtriple=amdgcn -debug-only=igrouplp < %s 2>&1| FileCheck -check-prefix=GCN %s

define protected amdgpu_kernel void @sched_barrier_m0(ptr addrspace(3) noalias %ind, ptr addrspace(3) noalias %outd) #0 {
;
; Set mask to 0 (no bits set)
;
; GCN: Applying IGroupLPDAGMutation...
; GCN-NEXT: Applying IGroupLPDAGMutation...
entry:
%arrayidx = getelementptr inbounds float, ptr addrspace(3) %ind, i64 0
%1 = load float, ptr addrspace(3) %arrayidx, align 4
call void @llvm.amdgcn.sched.barrier(i32 0) #1
%add = fadd contract float %1, 1.000000e+00
%arrayidx3 = getelementptr inbounds float, ptr addrspace(3) %outd, i64 0
store float %add, ptr addrspace(3) %arrayidx3, align 4
ret void
}

define protected amdgpu_kernel void @sched_barrier_m1(ptr addrspace(3) noalias %ind, ptr addrspace(3) noalias %outd) #0 {
;
; Set mask to 1 (ALU Bit, implies all *-ALU bits)
;
; GCN: Applying IGroupLPDAGMutation...
; GCN-NEXT: Building SchedGroup for SchedBarrier with Mask: 1
; GCN-NEXT: After Inverting, SchedGroup Mask: 1008
entry:
%arrayidx = getelementptr inbounds float, ptr addrspace(3) %ind, i64 0
%1 = load float, ptr addrspace(3) %arrayidx, align 4
call void @llvm.amdgcn.sched.barrier(i32 1) #1
%add = fadd contract float %1, 1.000000e+00
%arrayidx3 = getelementptr inbounds float, ptr addrspace(3) %outd, i64 0
store float %add, ptr addrspace(3) %arrayidx3, align 4
ret void
}

define protected amdgpu_kernel void @sched_barrier_m2(ptr addrspace(3) noalias %ind, ptr addrspace(3) noalias %outd) #0 {
;
; Set mask to 2 (VALU Bit)
;
; GCN: Applying IGroupLPDAGMutation...
; GCN-NEXT: Building SchedGroup for SchedBarrier with Mask: 2
; GCN-NEXT: After Inverting, SchedGroup Mask: 2044
entry:
%arrayidx = getelementptr inbounds float, ptr addrspace(3) %ind, i64 0
%1 = load float, ptr addrspace(3) %arrayidx, align 4
call void @llvm.amdgcn.sched.barrier(i32 2) #1
%add = fadd contract float %1, 1.000000e+00
%arrayidx3 = getelementptr inbounds float, ptr addrspace(3) %outd, i64 0
store float %add, ptr addrspace(3) %arrayidx3, align 4
ret void
}

define protected amdgpu_kernel void @sched_barrier_m4(ptr addrspace(3) noalias %ind, ptr addrspace(3) noalias %outd) #0 {
;
; Set mask to 4 (SALU Bit)
;
; GCN: Applying IGroupLPDAGMutation...
; GCN-NEXT: Building SchedGroup for SchedBarrier with Mask: 4
; GCN-NEXT: After Inverting, SchedGroup Mask: 2042
entry:
%arrayidx = getelementptr inbounds float, ptr addrspace(3) %ind, i64 0
%1 = load float, ptr addrspace(3) %arrayidx, align 4
call void @llvm.amdgcn.sched.barrier(i32 4) #1
%add = fadd contract float %1, 1.000000e+00
%arrayidx3 = getelementptr inbounds float, ptr addrspace(3) %outd, i64 0
store float %add, ptr addrspace(3) %arrayidx3, align 4
ret void
}

define protected amdgpu_kernel void @sched_barrier_m8(ptr addrspace(3) noalias %ind, ptr addrspace(3) noalias %outd) #0 {
;
; Set mask to 8 (MFMA Bit)
;
; GCN: Applying IGroupLPDAGMutation...
; GCN-NEXT: Building SchedGroup for SchedBarrier with Mask: 8
; GCN-NEXT: After Inverting, SchedGroup Mask: 2038
entry:
%arrayidx = getelementptr inbounds float, ptr addrspace(3) %ind, i64 0
%1 = load float, ptr addrspace(3) %arrayidx, align 4
call void @llvm.amdgcn.sched.barrier(i32 8) #1
%add = fadd contract float %1, 1.000000e+00
%arrayidx3 = getelementptr inbounds float, ptr addrspace(3) %outd, i64 0
store float %add, ptr addrspace(3) %arrayidx3, align 4
ret void
}

define protected amdgpu_kernel void @sched_barrier_m1024(ptr addrspace(3) noalias %ind, ptr addrspace(3) noalias %outd) #0 {
;
; Set mask to 1024 (TRANS Bit)
;
; GCN: Applying IGroupLPDAGMutation...
; GCN-NEXT: Building SchedGroup for SchedBarrier with Mask: 1024
; GCN-NEXT: After Inverting, SchedGroup Mask: 1022
entry:
%arrayidx = getelementptr inbounds float, ptr addrspace(3) %ind, i64 0
%1 = load float, ptr addrspace(3) %arrayidx, align 4
call void @llvm.amdgcn.sched.barrier(i32 1024) #1
%add = fadd contract float %1, 1.000000e+00
%arrayidx3 = getelementptr inbounds float, ptr addrspace(3) %outd, i64 0
store float %add, ptr addrspace(3) %arrayidx3, align 4
ret void
}

define protected amdgpu_kernel void @sched_barrier_m3(ptr addrspace(3) noalias %ind, ptr addrspace(3) noalias %outd) #0 {
;
; Set mask to 3 (ALU + VALU Bits)
;
; GCN: Applying IGroupLPDAGMutation...
; GCN-NEXT: Building SchedGroup for SchedBarrier with Mask: 3
; GCN-NEXT: After Inverting, SchedGroup Mask: 2044
entry:
%arrayidx = getelementptr inbounds float, ptr addrspace(3) %ind, i64 0
%1 = load float, ptr addrspace(3) %arrayidx, align 4
call void @llvm.amdgcn.sched.barrier(i32 3) #1
%add = fadd contract float %1, 1.000000e+00
%arrayidx3 = getelementptr inbounds float, ptr addrspace(3) %outd, i64 0
store float %add, ptr addrspace(3) %arrayidx3, align 4
ret void
}

define protected amdgpu_kernel void @sched_barrier_m5(ptr addrspace(3) noalias %ind, ptr addrspace(3) noalias %outd) #0 {
;
; Set mask to 5 (ALU + SALU Bits)
;
; GCN: Applying IGroupLPDAGMutation...
; GCN-NEXT: Building SchedGroup for SchedBarrier with Mask: 5
; GCN-NEXT: After Inverting, SchedGroup Mask: 2042
entry:
%arrayidx = getelementptr inbounds float, ptr addrspace(3) %ind, i64 0
%1 = load float, ptr addrspace(3) %arrayidx, align 4
call void @llvm.amdgcn.sched.barrier(i32 5) #1
%add = fadd contract float %1, 1.000000e+00
%arrayidx3 = getelementptr inbounds float, ptr addrspace(3) %outd, i64 0
store float %add, ptr addrspace(3) %arrayidx3, align 4
ret void
}

define protected amdgpu_kernel void @sched_barrier_m7(ptr addrspace(3) noalias %ind, ptr addrspace(3) noalias %outd) #0 {
;
; Set mask to 7 (ALU + VALU + SALU Bits)
;
; GCN: Applying IGroupLPDAGMutation...
; GCN-NEXT: Building SchedGroup for SchedBarrier with Mask: 7
; GCN-NEXT: After Inverting, SchedGroup Mask: 2040
entry:
%arrayidx = getelementptr inbounds float, ptr addrspace(3) %ind, i64 0
%1 = load float, ptr addrspace(3) %arrayidx, align 4
call void @llvm.amdgcn.sched.barrier(i32 7) #1
%add = fadd contract float %1, 1.000000e+00
%arrayidx3 = getelementptr inbounds float, ptr addrspace(3) %outd, i64 0
store float %add, ptr addrspace(3) %arrayidx3, align 4
ret void
}

define protected amdgpu_kernel void @sched_barrier_m15(ptr addrspace(3) noalias %ind, ptr addrspace(3) noalias %outd) #0 {
;
; Set mask to 15 (ALU + VALU + SALU + MFMA Bits)
;
; GCN: Applying IGroupLPDAGMutation...
; GCN-NEXT: Building SchedGroup for SchedBarrier with Mask: 15
; GCN-NEXT: After Inverting, SchedGroup Mask: 2032
entry:
%arrayidx = getelementptr inbounds float, ptr addrspace(3) %ind, i64 0
%1 = load float, ptr addrspace(3) %arrayidx, align 4
call void @llvm.amdgcn.sched.barrier(i32 15) #1
%add = fadd contract float %1, 1.000000e+00
%arrayidx3 = getelementptr inbounds float, ptr addrspace(3) %outd, i64 0
store float %add, ptr addrspace(3) %arrayidx3, align 4
ret void
}

declare void @llvm.amdgcn.sched.barrier(i32) #1

attributes #0 = { nounwind }
attributes #1 = { convergent nounwind }
Loading