Skip to content

Commit aba0fbf

Browse files
authored
[release/3.2.x] [CHERRY PICK] Add gfx950 target definition (#5452)
This PR brings in required LLVM bumps and additional targets for gfx950 support. - #5040 - #5064 - #5180 - #5242 - #5392 Reverts: - #5347 - #5191
1 parent e74f027 commit aba0fbf

File tree

6 files changed

+10
-9
lines changed

6 files changed

+10
-9
lines changed

cmake/llvm-hash.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
b5cc222d7429fe6f18c787f633d5262fac2e676f
1+
86b69c31642e98f8357df62c09d118ad1da4e16a

lib/Conversion/TritonToTritonGPU/TritonGPUConversion.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,20 +56,19 @@ TritonGPUTypeConverter::TritonGPUTypeConverter(MLIRContext *context,
5656
// This will create newArg, and map(origArg, newArg)
5757
addArgumentMaterialization([&](OpBuilder &builder,
5858
RankedTensorType tensorType, ValueRange inputs,
59-
Location loc) -> std::optional<Value> {
59+
Location loc) -> Value {
6060
llvm_unreachable("Argument rematerialization should not happen in Triton "
6161
"-> TritonGPU conversion");
62-
return std::nullopt;
62+
return {};
6363
});
6464

6565
// If the origValue still has live user(s), use this to
6666
// convert origValue to newValue
6767
addSourceMaterialization([&](OpBuilder &builder, RankedTensorType tensorType,
68-
ValueRange inputs,
69-
Location loc) -> std::optional<Value> {
68+
ValueRange inputs, Location loc) -> Value {
7069
llvm_unreachable("Source rematerialization should not happen in Triton -> "
7170
"TritonGPU Conversion");
72-
return std::nullopt;
71+
return {};
7372
});
7473

7574
// This will be called when (desiredType != newOperandType)
@@ -79,7 +78,7 @@ TritonGPUTypeConverter::TritonGPUTypeConverter(MLIRContext *context,
7978
ValueRange inputs, Location loc) {
8079
auto cast =
8180
builder.create<triton::gpu::ConvertLayoutOp>(loc, tensorType, inputs);
82-
return std::optional<Value>(cast.getResult());
81+
return cast.getResult();
8382
});
8483
}
8584

test/TritonGPU/amd/amd-convert-buffer-ops.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ module attributes {"triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 4 :
4242
%1 = arith.muli %0, %c1024_i32 : i32
4343
%sub = arith.subi %1, %c128_i32 : i32
4444
%cmp = arith.cmpi sgt, %sub, %c0_i32 : i32
45-
"llvm.intr.assume"(%cmp) : (i1) -> ()
45+
llvm.intr.assume %cmp : i1
4646
%2 = tt.splat %sub : i32 -> tensor<1024xi32, #blocked>
4747
%3 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32, #blocked>
4848
// CHECK: %[[offset:.*]] = arith.addi

test/lib/Instrumentation/GPUHello.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ bool GpuHello::runOnModule(Module &module) {
6161

6262
PassPluginLibraryInfo getPassPluginInfo() {
6363
const auto callback = [](PassBuilder &pb) {
64-
pb.registerOptimizerLastEPCallback([&](ModulePassManager &mpm, auto) {
64+
pb.registerOptimizerLastEPCallback([&](ModulePassManager &mpm, auto, auto) {
6565
mpm.addPass(GpuHello());
6666
return true;
6767
});

third_party/amd/backend/include/hsa/amd_hsa_elf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ enum : unsigned {
130130
EF_AMDGPU_MACH_AMDGCN_GFX1151 = 0x04a,
131131
EF_AMDGPU_MACH_AMDGCN_GFX941 = 0x04b,
132132
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
133+
EF_AMDGPU_MACH_AMDGCN_GFX950 = 0x04f,
133134

134135
// First/last AMDGCN-based processors.
135136
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,

third_party/amd/lib/TritonAMDGPUToLLVM/TargetUtils.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ ISAFamily deduceISAFamily(llvm::StringRef arch) {
1111

1212
// CDNA ISA cases
1313
switch (kind) {
14+
case llvm::AMDGPU::GK_GFX950:
1415
case llvm::AMDGPU::GK_GFX942:
1516
case llvm::AMDGPU::GK_GFX941:
1617
case llvm::AMDGPU::GK_GFX940:

0 commit comments

Comments
 (0)