Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1238,6 +1238,19 @@ def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst",
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//

// Ugly hack to accomodate assembling modules with mixed
// wavesizes. Ideally we would have a mapping symbol in assembly which
// would keep track of which sections of code should be treated as
// wave32 and wave64. Instead what users do is assemble with both
// wavesizes enabled. We translate this into this special mode so this
// only influences assembler behavior and nothing else.
def FeatureAssemblerPermissiveWavesize : SubtargetFeature<
"assembler-permissive-wavesize",
"AssemblerPermissiveWavesize",
"true",
"allow parsing wave32 and wave64 variants of instructions"
>;

class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
"max-private-element-size-"#size,
"MaxPrivateElementSize",
Expand Down
33 changes: 21 additions & 12 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1246,6 +1246,12 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
// AsmParser
//===----------------------------------------------------------------------===//

// TODO: define GET_SUBTARGET_FEATURE_NAME
#define GET_REGISTER_MATCHER
#include "AMDGPUGenAsmMatcher.inc"
#undef GET_REGISTER_MATCHER
#undef GET_SUBTARGET_FEATURE_NAME

// Holds info related to the current kernel, e.g. count of SGPRs used.
// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
// .amdgpu_hsa_kernel or at EOF.
Expand Down Expand Up @@ -1536,6 +1542,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
return AMDGPU::isGFX10_BEncoding(getSTI());
}

bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }

bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }

bool hasInv2PiInlineImm() const {
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
}
Expand Down Expand Up @@ -1603,6 +1613,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
return &MII;
}

// FIXME: This should not be used. Instead, should use queries derived from
// getAvailableFeatures().
const FeatureBitset &getFeatureBits() const {
return getSTI().getFeatureBits();
}
Expand Down Expand Up @@ -2259,9 +2271,8 @@ bool AMDGPUOperand::isSDWAInt32Operand() const {
}

bool AMDGPUOperand::isBoolReg() const {
auto FB = AsmParser->getFeatureBits();
return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
(FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
(AsmParser->isWave32() && isSCSrc_b32()));
}

uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
Expand Down Expand Up @@ -5025,9 +5036,8 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,

// Check if VCC register matches wavefront size
bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
auto FB = getFeatureBits();
return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
(FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
return (Reg == AMDGPU::VCC && isWave64()) ||
(Reg == AMDGPU::VCC_LO && isWave32());
}

// One unique literal can be used. VOP3 literal is only allowed in GFX10+
Expand Down Expand Up @@ -5717,7 +5727,7 @@ bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
// Check if this instruction may be used with a different wavesize.
if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {

// FIXME: Use getAvailableFeatures, and do not manually recompute
FeatureBitset FeaturesWS32 = getFeatureBits();
FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
.flip(AMDGPU::FeatureWavefrontSize32);
Expand Down Expand Up @@ -6472,10 +6482,10 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
if (!isGFX10Plus())
return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
if (!isWave32())
return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
} else {
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
if (!isWave64())
return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
}
}
Expand All @@ -6484,10 +6494,10 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
if (C.wavefront_size == 5) {
if (!isGFX10Plus())
return TokError("wavefront_size=5 is only allowed on GFX10+");
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
if (!isWave32())
return TokError("wavefront_size=5 requires +WavefrontSize32");
} else if (C.wavefront_size == 6) {
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
if (!isWave64())
return TokError("wavefront_size=6 requires +WavefrontSize64");
}
}
Expand Down Expand Up @@ -10390,7 +10400,6 @@ LLVMInitializeAMDGPUAsmParser() {
RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
}

#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
#define GET_MNEMONIC_SPELL_CHECKER
#define GET_MNEMONIC_CHECKER
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool EnableDS128 = false;
bool EnablePRTStrictNull = false;
bool DumpCode = false;
bool AssemblerPermissiveWavesize = false;

// Subtarget statically properties set by tablegen
bool FP64 = false;
Expand Down
20 changes: 18 additions & 2 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,20 +82,36 @@ createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
MCSubtargetInfo *STI =
createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);

bool IsWave64 = STI->hasFeature(AMDGPU::FeatureWavefrontSize64);
bool IsWave32 = STI->hasFeature(AMDGPU::FeatureWavefrontSize32);

// FIXME: We should error for the default target.
if (STI->getFeatureBits().none())
STI->ToggleFeature(AMDGPU::FeatureSouthernIslands);

if (!STI->hasFeature(AMDGPU::FeatureWavefrontSize64) &&
!STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) {
if (!IsWave64 && !IsWave32) {
// If there is no default wave size it must be a generation before gfx10,
// these have FeatureWavefrontSize64 in their definition already. For gfx10+
// set wave32 as a default.
STI->ToggleFeature(AMDGPU::isGFX10Plus(*STI)
? AMDGPU::FeatureWavefrontSize32
: AMDGPU::FeatureWavefrontSize64);
} else if (IsWave64 && IsWave32) {
// The wave size is mutually exclusive. If both somehow end up set, wave32
// wins if supported.
STI->ToggleFeature(AMDGPU::supportsWave32(*STI)
? AMDGPU::FeatureWavefrontSize64
: AMDGPU::FeatureWavefrontSize32);

// If both wavesizes were manually requested, hack in a feature to permit
// assembling modules with mixed wavesizes.
STI->ToggleFeature(AMDGPU::FeatureAssemblerPermissiveWavesize);
}

assert((STI->hasFeature(AMDGPU::FeatureWavefrontSize64) !=
STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) &&
"wavesize features are mutually exclusive");

return STI;
}

Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
//===----------------------------------------------------------------------===//

def isWave32 : Predicate<"Subtarget->isWave32()">,
AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
AssemblerPredicate <(any_of FeatureWavefrontSize32,
FeatureAssemblerPermissiveWavesize)>;
def isWave64 : Predicate<"Subtarget->isWave64()">,
AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
AssemblerPredicate <(any_of FeatureWavefrontSize64,
FeatureAssemblerPermissiveWavesize)>;

class AMDGPUMnemonicAlias<string From, string To, string VariantName = "">
: MnemonicAlias<From, To, VariantName>, PredicateControl;
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1568,6 +1568,11 @@ bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
bool hasMAIInsts(const MCSubtargetInfo &STI);
bool hasVOPD(const MCSubtargetInfo &STI);
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);

inline bool supportsWave32(const MCSubtargetInfo &STI) {
return AMDGPU::isGFX10Plus(STI) && !AMDGPU::isGFX1250(STI);
}

int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
unsigned hasKernargPreload(const MCSubtargetInfo &STI);
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
Expand Down
23 changes: 23 additions & 0 deletions llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize64 -o - %s | FileCheck -check-prefix=GFX1250 %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -mattr=+wavefrontsize32 -o - %s | FileCheck -check-prefix=GFX900 %s

// Make sure setting both modes is supported at the same time.
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 %s | FileCheck -check-prefixes=GFX10 %s

// Test that there is no assertion when using an explicit
// wavefrontsize attribute on a target which does not support it.

// GFX1250: v_add_f64_e32 v[0:1], 1.0, v[0:1]
// GFX900: v_add_f64 v[0:1], 1.0, v[0:1]
// GFX10: v_add_f64 v[0:1], 1.0, v[0:1]
v_add_f64 v[0:1], 1.0, v[0:1]

// GFX1250: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
// GFX900: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
// GFX10: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
v_cmp_eq_u32_e64 s[0:1], 1.0, s1

// GFX1250: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
// GFX900: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
// GFX10: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
v_cndmask_b32 v1, v2, v3, s[0:1]
2 changes: 1 addition & 1 deletion llvm/test/MC/Disassembler/AMDGPU/gfx10_vopc.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=W32 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=W64 %s

# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=W32 %s

# W32: v_cmp_class_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x10,0x7d]
# W64: v_cmp_class_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x10,0x7d]
Expand Down
13 changes: 13 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx1250_wave64_feature.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize64 -disassemble -o - %s | FileCheck %s

# Make sure there's no assertion when trying to use an unsupported
# wave64 on a wave32-only target

# CHECK: v_add_f64_e32 v[0:1], 1.0, v[0:1]
0xf2,0x00,0x00,0x04

# CHECK: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
0x00,0x00,0x4a,0xd4,0xf2,0x02,0x00,0x00

# CHECK: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
0x01,0x00,0x01,0xd5,0x02,0x07,0x02,0x00
13 changes: 13 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx9_wave32_feature.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -mattr=+wavefrontsize32 -disassemble -o - %s | FileCheck %s

# Make sure there's no assertion when trying to use an unsupported
# wave32 on a wave64-only target

# CHECK: v_add_f64 v[0:1], 1.0, v[0:1]
0x00,0x00,0x80,0xd2,0xf2,0x00,0x02,0x00

# CHECK: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
0x00,0x00,0xca,0xd0,0xf2,0x02,0x00,0x00

# CHECK: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
0x01,0x00,0x00,0xd1,0x02,0x07,0x02,0x00