Skip to content

Commit 95be6c5

Browse files
committed
Merge branch 'users/arsenm/gfx1250-wave64-error' into HEAD
2 parents 5b4d86d + b73291f commit 95be6c5

File tree

10 files changed

+112
-17
lines changed

10 files changed

+112
-17
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,6 +1238,19 @@ def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst",
12381238
// Subtarget Features (options and debugging)
12391239
//===------------------------------------------------------------===//
12401240

1241+
// Ugly hack to accomodate an assembling modules with mixed
1242+
// wavesizes. Ideally we would have a mapping symbol in assembly which
1243+
// would keep track of which sections of code should be treated as
1244+
// wave32 and wave64. Instead what users do is assemble with both
1245+
// wavesizes enabled. We translate this into this special mode so this
1246+
// only influences assembler behavior and nothing else.
1247+
def FeatureAssemblerPermissiveWavesize : SubtargetFeature<
1248+
"assembler-permissive-wavesize",
1249+
"AssemblerPermissiveWavesize",
1250+
"true",
1251+
"allow parsing wave32 and wave64 variants of instructions"
1252+
>;
1253+
12411254
class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
12421255
"max-private-element-size-"#size,
12431256
"MaxPrivateElementSize",

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,12 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
12471247
// AsmParser
12481248
//===----------------------------------------------------------------------===//
12491249

1250+
// TODO: define GET_SUBTARGET_FEATURE_NAME
1251+
#define GET_REGISTER_MATCHER
1252+
#include "AMDGPUGenAsmMatcher.inc"
1253+
#undef GET_REGISTER_MATCHER
1254+
#undef GET_SUBTARGET_FEATURE_NAME
1255+
12501256
// Holds info related to the current kernel, e.g. count of SGPRs used.
12511257
// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
12521258
// .amdgpu_hsa_kernel or at EOF.
@@ -1545,6 +1551,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
15451551
return AMDGPU::isGFX10_BEncoding(getSTI());
15461552
}
15471553

1554+
bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1555+
1556+
bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1557+
15481558
bool hasInv2PiInlineImm() const {
15491559
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
15501560
}
@@ -1608,6 +1618,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
16081618
return &MII;
16091619
}
16101620

1621+
// FIXME: This should not be used. Instead, should use queries derived from
1622+
// getAvailableFeatures().
16111623
const FeatureBitset &getFeatureBits() const {
16121624
return getSTI().getFeatureBits();
16131625
}
@@ -2264,9 +2276,8 @@ bool AMDGPUOperand::isSDWAInt32Operand() const {
22642276
}
22652277

22662278
bool AMDGPUOperand::isBoolReg() const {
2267-
auto FB = AsmParser->getFeatureBits();
2268-
return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2269-
(FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2279+
return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2280+
(AsmParser->isWave32() && isSCSrc_b32()));
22702281
}
22712282

22722283
uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
@@ -4984,9 +4995,8 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
49844995

49854996
// Check if VCC register matches wavefront size
49864997
bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
4987-
auto FB = getFeatureBits();
4988-
return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4989-
(FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4998+
return (Reg == AMDGPU::VCC && isWave64()) ||
4999+
(Reg == AMDGPU::VCC_LO && isWave32());
49905000
}
49915001

49925002
// One unique literal can be used. VOP3 literal is only allowed in GFX10+
@@ -5671,7 +5681,7 @@ bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
56715681
// Check if this instruction may be used with a different wavesize.
56725682
if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
56735683
!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5674-
5684+
// FIXME: Use getAvailableFeatures, and do not manually recompute
56755685
FeatureBitset FeaturesWS32 = getFeatureBits();
56765686
FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
56775687
.flip(AMDGPU::FeatureWavefrontSize32);
@@ -6426,10 +6436,10 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
64266436
if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
64276437
if (!isGFX10Plus())
64286438
return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6429-
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6439+
if (!isWave32())
64306440
return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
64316441
} else {
6432-
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6442+
if (!isWave64())
64336443
return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
64346444
}
64356445
}
@@ -6438,10 +6448,10 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
64386448
if (C.wavefront_size == 5) {
64396449
if (!isGFX10Plus())
64406450
return TokError("wavefront_size=5 is only allowed on GFX10+");
6441-
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6451+
if (!isWave32())
64426452
return TokError("wavefront_size=5 requires +WavefrontSize32");
64436453
} else if (C.wavefront_size == 6) {
6444-
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6454+
if (!isWave64())
64456455
return TokError("wavefront_size=6 requires +WavefrontSize64");
64466456
}
64476457
}
@@ -10344,7 +10354,6 @@ LLVMInitializeAMDGPUAsmParser() {
1034410354
RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
1034510355
}
1034610356

10347-
#define GET_REGISTER_MATCHER
1034810357
#define GET_MATCHER_IMPLEMENTATION
1034910358
#define GET_MNEMONIC_SPELL_CHECKER
1035010359
#define GET_MNEMONIC_CHECKER

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
9999
bool EnableDS128 = false;
100100
bool EnablePRTStrictNull = false;
101101
bool DumpCode = false;
102+
bool AssemblerPermissiveWavesize = false;
102103

103104
// Subtarget statically properties set by tablegen
104105
bool FP64 = false;

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,20 +82,36 @@ createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
8282
MCSubtargetInfo *STI =
8383
createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
8484

85+
bool IsWave64 = STI->hasFeature(AMDGPU::FeatureWavefrontSize64);
86+
bool IsWave32 = STI->hasFeature(AMDGPU::FeatureWavefrontSize32);
87+
8588
// FIXME: We should error for the default target.
8689
if (STI->getFeatureBits().none())
8790
STI->ToggleFeature(AMDGPU::FeatureSouthernIslands);
8891

89-
if (!STI->hasFeature(AMDGPU::FeatureWavefrontSize64) &&
90-
!STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) {
92+
if (!IsWave64 && !IsWave32) {
9193
// If there is no default wave size it must be a generation before gfx10,
9294
// these have FeatureWavefrontSize64 in their definition already. For gfx10+
9395
// set wave32 as a default.
9496
STI->ToggleFeature(AMDGPU::isGFX10Plus(*STI)
9597
? AMDGPU::FeatureWavefrontSize32
9698
: AMDGPU::FeatureWavefrontSize64);
99+
} else if (IsWave64 && IsWave32) {
100+
// The wave size is mutually exclusive. If both somehow end up set, wave64
101+
// wins if supported.
102+
STI->ToggleFeature(AMDGPU::supportsWave32(*STI)
103+
? AMDGPU::FeatureWavefrontSize64
104+
: AMDGPU::FeatureWavefrontSize32);
105+
106+
// If both wavesizes were manually requested, hack in a feature to permit
107+
// assembling modules with mixed wavesizes.
108+
STI->ToggleFeature(AMDGPU::FeatureAssemblerPermissiveWavesize);
97109
}
98110

111+
assert((STI->hasFeature(AMDGPU::FeatureWavefrontSize64) !=
112+
STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) &&
113+
"wavesize features are mutually exclusive");
114+
99115
return STI;
100116
}
101117

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77
//===----------------------------------------------------------------------===//
88

99
def isWave32 : Predicate<"Subtarget->isWave32()">,
10-
AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
10+
AssemblerPredicate <(any_of FeatureWavefrontSize32,
11+
FeatureAssemblerPermissiveWavesize)>;
1112
def isWave64 : Predicate<"Subtarget->isWave64()">,
12-
AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
13+
AssemblerPredicate <(any_of FeatureWavefrontSize64,
14+
FeatureAssemblerPermissiveWavesize)>;
1315

1416
class AMDGPUMnemonicAlias<string From, string To, string VariantName = "">
1517
: MnemonicAlias<From, To, VariantName>, PredicateControl;

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1568,6 +1568,11 @@ bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
15681568
bool hasMAIInsts(const MCSubtargetInfo &STI);
15691569
bool hasVOPD(const MCSubtargetInfo &STI);
15701570
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1571+
1572+
inline bool supportsWave32(const MCSubtargetInfo &STI) {
1573+
return AMDGPU::isGFX10Plus(STI) && !AMDGPU::isGFX1250(STI);
1574+
}
1575+
15711576
int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
15721577
unsigned hasKernargPreload(const MCSubtargetInfo &STI);
15731578
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize64 -o - %s | FileCheck -check-prefix=GFX1250 %s
2+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -mattr=+wavefrontsize32 -o - %s | FileCheck -check-prefix=GFX900 %s
3+
4+
// Both are supported, but not at the same time
5+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 %s | FileCheck -check-prefixes=GFX10 %s
6+
7+
// Test that there is no assertion when using an explicit
8+
// wavefrontsize attribute on a target which does not support it.
9+
10+
// GFX1250: v_add_f64_e32 v[0:1], 1.0, v[0:1]
11+
// GFX900: v_add_f64 v[0:1], 1.0, v[0:1]
12+
// GFX10: v_add_f64 v[0:1], 1.0, v[0:1]
13+
v_add_f64 v[0:1], 1.0, v[0:1]
14+
15+
// GFX1250: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
16+
// GFX900: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
17+
// GFX10: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
18+
v_cmp_eq_u32_e64 s[0:1], 1.0, s1
19+
20+
// GFX1250: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
21+
// GFX900: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
22+
// GFX10: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
23+
v_cndmask_b32 v1, v2, v3, s[0:1]

llvm/test/MC/Disassembler/AMDGPU/gfx10_vopc.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=W32 %s
22
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=W64 %s
3-
3+
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=W32 %s
44

55
# W32: v_cmp_class_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x10,0x7d]
66
# W64: v_cmp_class_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x10,0x7d]
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize64 -disassemble -o - %s | FileCheck %s
2+
3+
# Make sure there's no assertion when trying to use an unsupported
4+
# wave64 on a wave32-only target
5+
6+
# CHECK: v_add_f64_e32 v[0:1], 1.0, v[0:1]
7+
0xf2,0x00,0x00,0x04
8+
9+
# CHECK: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
10+
0x00,0x00,0x4a,0xd4,0xf2,0x02,0x00,0x00
11+
12+
# CHECK: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
13+
0x01,0x00,0x01,0xd5,0x02,0x07,0x02,0x00
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -mattr=+wavefrontsize32 -disassemble -o - %s | FileCheck %s
2+
3+
# Make sure there's no assertion when trying to use an unsupported
4+
# wave32 on a wave64-only target
5+
6+
# CHECK: v_add_f64 v[0:1], 1.0, v[0:1]
7+
0x00,0x00,0x80,0xd2,0xf2,0x00,0x02,0x00
8+
9+
# CHECK: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
10+
0x00,0x00,0xca,0xd0,0xf2,0x02,0x00,0x00
11+
12+
# CHECK: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
13+
0x01,0x00,0x00,0xd1,0x02,0x07,0x02,0x00

0 commit comments

Comments
 (0)