Skip to content

Commit a24521a

Browse files
committed
Guess the constraints instead of using user-provided hints
1 parent a3f52ee commit a24521a

File tree

2 files changed

+53
-7
lines changed

2 files changed

+53
-7
lines changed

llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2392,13 +2392,56 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
23922392
Result = false;
23932393

23942394
else if (MI.isInlineAsm()) {
2395+
auto &TRI = TII->getRegisterInfo();
2396+
auto &MRI = MI.getParent()->getParent()->getRegInfo();
2397+
bool SGPR_used = false, VGPR_used = false, VMFMA_used = false,
2398+
MayLoad = MI.mayLoad(), MayStore = MI.mayStore();
2399+
for (const MachineOperand &Operand : MI.operands())
2400+
if (Operand.isReg()) {
2401+
auto &RegClass = *TRI.getRegClassForOperandReg(MRI, Operand);
2402+
if (TRI.isVGPRClass(&RegClass))
2403+
VGPR_used = true;
2404+
if (TRI.isAGPRClass(&RegClass) || TRI.getRegSizeInBits(RegClass) > 128)
2405+
VMFMA_used = true;
2406+
if (TRI.isSGPRClass(&RegClass))
2407+
SGPR_used = true;
2408+
}
2409+
2410+
unsigned long InlineAsmMask = 0;
2411+
if (VGPR_used && !VMFMA_used && !MayLoad && !MayStore)
2412+
InlineAsmMask |= (unsigned long)SchedGroupMask::VALU;
2413+
if (VMFMA_used)
2414+
InlineAsmMask |= (unsigned long)SchedGroupMask::MFMA;
2415+
if (VGPR_used && MayLoad)
2416+
InlineAsmMask |= (unsigned long)SchedGroupMask::VMEM_READ;
2417+
if (VGPR_used && MayStore)
2418+
InlineAsmMask |= (unsigned long)SchedGroupMask::VMEM_WRITE;
2419+
if (!VGPR_used && MayLoad)
2420+
InlineAsmMask |= (unsigned long)SchedGroupMask::DS_READ;
2421+
if (!VGPR_used && MayStore)
2422+
InlineAsmMask |= (unsigned long)SchedGroupMask::DS_WRITE;
2423+
if (InlineAsmMask & (unsigned long)SchedGroupMask::VALU ||
2424+
InlineAsmMask & (unsigned long)SchedGroupMask::SALU)
2425+
InlineAsmMask |= (unsigned long)SchedGroupMask::ALU;
2426+
if (InlineAsmMask & (unsigned long)SchedGroupMask::DS_READ ||
2427+
InlineAsmMask & (unsigned long)SchedGroupMask::DS_WRITE)
2428+
InlineAsmMask |= (unsigned long)SchedGroupMask::DS;
2429+
if (InlineAsmMask & (unsigned long)SchedGroupMask::VMEM_READ ||
2430+
InlineAsmMask & (unsigned long)SchedGroupMask::VMEM_WRITE)
2431+
InlineAsmMask |= (unsigned long)SchedGroupMask::VMEM;
2432+
2433+
Result = ((unsigned long)SGMask & InlineAsmMask) != 0;
2434+
2435+
// Original implementation
2436+
#if 0
23952437
StringRef Text = MI.getOperand(0).getSymbolName();
23962438
if (Text.find("SGMASK:") != std::string::npos) {
23972439
Text = Text.substr(Text.find("SGMASK:") + strlen("SGMASK:"));
23982440
Text = Text.substr(0, Text.find_first_of(" \t\r\n"));
23992441
unsigned long InlineAsmMask = std::stoul(Text.str(), nullptr, 0);
24002442
Result = ((unsigned long)SGMask & InlineAsmMask) != 0;
24012443
}
2444+
#endif
24022445
}
24032446

24042447
else if (((SGMask & SchedGroupMask::ALU) != SchedGroupMask::NONE) &&

llvm/test/CodeGen/AMDGPU/inlineasm-sgmask.ll

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,20 +28,23 @@ if.then: ; preds = %entry
2828
%mul3 = shl nsw i32 %add, 2
2929
%idx.ext4 = sext i32 %mul3 to i64
3030
%add.ptr5 = getelementptr inbounds float, ptr addrspace(1) %from.coerce, i64 %idx.ext4
31-
%2 = load <4 x float>, ptr addrspace(1) %add.ptr5, align 16, !tbaa !0
32-
%3 = extractelement <4 x float> %2, i64 3
33-
%4 = extractelement <4 x float> %2, i64 0
34-
%5 = tail call contract noundef float asm "v_add_f32_e32 $0, $1, $2 ; SGMASK:0x1", "=v,v,v"(float %3, float %4) #3, !srcloc !3
35-
%6 = extractelement <4 x float> %2, i64 1
36-
%7 = extractelement <4 x float> %2, i64 2
31+
%2 = load <2 x float>, ptr addrspace(1) %add.ptr5, align 16, !tbaa !0
32+
%a20 = add i64 %idx.ext4, 2
33+
%a21 = getelementptr inbounds float, ptr addrspace(1) %from.coerce, i64 %a20
34+
%a22 = load <2 x float>, ptr addrspace(1) %a21, align 16, !tbaa !0
35+
%3 = extractelement <2 x float> %a22, i64 1
36+
%4 = extractelement <2 x float> %2, i64 0
37+
%5 = tail call contract noundef float asm "v_mfma_f64_4x4x4f64 $0, $1, $2, 0", "=a,v,v"(<2 x float> %2, <2 x float> %a22) #3, !srcloc !3
38+
%6 = extractelement <2 x float> %2, i64 1
39+
%7 = extractelement <2 x float> %a22, i64 0
3740
%add6 = fadd contract float %6, %7
3841
%add7 = fadd contract float %5, %add6
3942
store float %add7, ptr addrspace(1) %add.ptr, align 4, !tbaa !4
4043
tail call void @llvm.amdgcn.sched.group.barrier(i32 16, i32 1, i32 0)
4144
tail call void @llvm.amdgcn.sched.group.barrier(i32 2, i32 5, i32 0)
4245
tail call void @llvm.amdgcn.sched.group.barrier(i32 16, i32 1, i32 0)
4346
tail call void @llvm.amdgcn.sched.group.barrier(i32 2, i32 1, i32 0)
44-
tail call void @llvm.amdgcn.sched.group.barrier(i32 1, i32 1, i32 0)
47+
tail call void @llvm.amdgcn.sched.group.barrier(i32 8, i32 1, i32 0)
4548
tail call void @llvm.amdgcn.sched.group.barrier(i32 2, i32 1, i32 0)
4649
br label %if.end
4750

0 commit comments

Comments
 (0)