Skip to content

Commit a06a068

Browse files
amd-eochoalokeshavvinayak01
authored andcommitted
Add ChipDetails definition for MI350X and MI355X target. (iree-org#21690)
Signed-off-by: Erick Ochoa <[email protected]> Signed-off-by: keshavvinayak01 <[email protected]>
1 parent 21023e5 commit a06a068

File tree

4 files changed

+38
-2
lines changed

4 files changed

+38
-2
lines changed

compiler/plugins/target/ROCM/test/target_device_features.mlir

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
//
1010
// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' \
1111
// RUN: --iree-hip-target=gfx950 %s | FileCheck %s --check-prefixes=GFX950
12-
//
12+
// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' \
13+
// RUN: --iree-hip-target=mi350x %s | FileCheck %s --check-prefixes=GFX950,MI350X
14+
// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' \
15+
// RUN: --iree-hip-target=mi355x %s | FileCheck %s --check-prefixes=GFX950,MI355X
1316
//
1417
// RUN: iree-opt --pass-pipeline='builtin.module(iree-hal-assign-target-devices{targetDevices=hip},iree-hal-transformation-pipeline{serialize-executables=false})' \
1518
// RUN: --iree-hip-target=rx7900xtx %s | FileCheck %s --check-prefix=GFX1100
@@ -58,6 +61,8 @@
5861
// GFX950-SAME: scaled_mma = [<intrinsic = MFMA_SCALE_F32_16x16x128_B32, lhs_elem_type = f8E8M0FNU, rhs_elem_type = f8E8M0FNU, acc_elem_type = f32>, <intrinsic = MFMA_SCALE_F32_16x16x128_B32, lhs_elem_type = f8E5M2, rhs_elem_type = f8E5M2, acc_elem_type = f32>, <intrinsic = MFMA_SCALE_F32_16x16x128_B32, lhs_elem_type = f8E5M2FNUZ, rhs_elem_type = f8E5M2FNUZ, acc_elem_type = f32>, <intrinsic = MFMA_SCALE_F32_16x16x128_B32, lhs_elem_type = f8E4M3FN, rhs_elem_type = f8E4M3FN, acc_elem_type = f32>, <intrinsic = MFMA_SCALE_F32_16x16x128_B32, lhs_elem_type = f8E4M3FNUZ, rhs_elem_type = f8E4M3FNUZ, acc_elem_type = f32>, <intrinsic = MFMA_SCALE_F32_16x16x128_B32, lhs_elem_type = f4E2M1FN, rhs_elem_type = f4E2M1FN, acc_elem_type = f32>, <intrinsic = MFMA_SCALE_F32_32x32x64_B32, lhs_elem_type = f8E8M0FNU, rhs_elem_type = f8E8M0FNU, acc_elem_type = f32>, <intrinsic = MFMA_SCALE_F32_32x32x64_B32, lhs_elem_type = f8E5M2, rhs_elem_type = f8E5M2, acc_elem_type = f32>, <intrinsic = MFMA_SCALE_F32_32x32x64_B32, lhs_elem_type = f8E5M2FNUZ, rhs_elem_type = f8E5M2FNUZ, acc_elem_type = f32>, <intrinsic = MFMA_SCALE_F32_32x32x64_B32, lhs_elem_type = f8E4M3FN, rhs_elem_type = f8E4M3FN, acc_elem_type = f32>, <intrinsic = MFMA_SCALE_F32_32x32x64_B32, lhs_elem_type = f8E4M3FNUZ, rhs_elem_type = f8E4M3FNUZ, acc_elem_type = f32>, <intrinsic = MFMA_SCALE_F32_32x32x64_B32, lhs_elem_type = f4E2M1FN, rhs_elem_type = f4E2M1FN, acc_elem_type = f32>],
5962
// GFX950-SAME: subgroup_size_choices = [64],
6063
// GFX950-SAME: max_workgroup_memory_bytes = 163840,
64+
// MI350X: chip = <wgp_count = 256, sku = "mi350x", memory_bandwidth_tbps = 8.000000e+00 : f32, perf_tflops = {fp16 = 2.300000e+03 : f32, fp32 = 1.442000e+02 : f32, fp4 = 9.200000e+03 : f32, fp6 = 9.200000e+03 : f32, fp8 = 4.600000e+03 : f32, int8 = 4.600000e+03 : f32}>>
65+
// MI355X: chip = <wgp_count = 256, sku = "mi355x", memory_bandwidth_tbps = 8.000000e+00 : f32, perf_tflops = {fp16 = 2.500000e+03 : f32, fp32 = 1.573000e+02 : f32, fp4 = 1.000000e+04 : f32, fp6 = 1.000000e+04 : f32, fp8 = 5.000000e+03 : f32, int8 = 5.000000e+03 : f32}>>
6166

6267
// GFX1100: target_info = #iree_gpu.target<arch = "gfx1100",
6368
// GFX1100-SAME: mma = [<WMMAR3_F32_16x16x16_F16>, <WMMAR3_F16_16x16x16_F16>, <WMMAR3_F32_16x16x16_BF16>, <WMMAR3_BF16_16x16x16_BF16>, <WMMAR3_I32_16x16x16_I8>]

compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUEnums.td

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,18 @@ def IREEGPU_CIBW_16 : I32BitEnumAttrCaseBit<"Int16", 5, "int16">;
3333
def IREEGPU_CIBW_8 : I32BitEnumAttrCaseBit<"Int8", 6, "int8">;
3434
// Generic 8-bit floating point format in computation
3535
def IREEGPU_CFBW_8 : I32BitEnumAttrCaseBit<"FP8", 7, "fp8">;
36+
// Generic 6-bit floating point format in computation
37+
def IREEGPU_CFBW_6 : I32BitEnumAttrCaseBit<"FP6", 8, "fp6">;
38+
// Generic 4-bit floating point format in computation
39+
def IREEGPU_CFBW_4 : I32BitEnumAttrCaseBit<"FP4", 9, "fp4">;
3640

3741

3842
def IREEGPU_ComputeBitwidths : I32BitEnumAttr<
3943
"ComputeBitwidths", "Supported bitwidths for compute",
4044
[IREEGPU_CFBW_64, IREEGPU_CFBW_32, IREEGPU_CFBW_16,
4145
IREEGPU_CIBW_64, IREEGPU_CIBW_32, IREEGPU_CIBW_16,
42-
IREEGPU_CIBW_8, IREEGPU_CFBW_8]> {
46+
IREEGPU_CIBW_8, IREEGPU_CFBW_8, IREEGPU_CFBW_6,
47+
IREEGPU_CFBW_4]> {
4348
let cppNamespace = "::mlir::iree_compiler::IREE::GPU";
4449
let genSpecializedAttr = 0;
4550
}

compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,27 @@ std::optional<TargetDetails> getAMDGPUTargetDetails(StringRef target) {
452452
const WgpDetails *rdna1Wgp = getRDNA1WgpDetails();
453453

454454
// --- CDNA --- //
455+
// "AMD Instinct MI350 Series Product Offerings" in Page 18 of
456+
// https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-4-architecture-whitepaper.pdf
457+
static const ChipDetails mi350xChip = {256,
458+
"mi350x",
459+
8.0f,
460+
{{ComputeBitwidths::FP32, 144.2f},
461+
{ComputeBitwidths::FP16, 2300.0f},
462+
{ComputeBitwidths::Int8, 4600.0f},
463+
{ComputeBitwidths::FP8, 4600.0f},
464+
{ComputeBitwidths::FP6, 9200.0f},
465+
{ComputeBitwidths::FP4, 9200.0f}}};
466+
467+
static const ChipDetails mi355xChip = {256,
468+
"mi355x",
469+
8.0f,
470+
{{ComputeBitwidths::FP32, 157.3f},
471+
{ComputeBitwidths::FP16, 2500.0f},
472+
{ComputeBitwidths::Int8, 5000.0f},
473+
{ComputeBitwidths::FP8, 5000.0f},
474+
{ComputeBitwidths::FP6, 10000.0f},
475+
{ComputeBitwidths::FP4, 10000.0f}}};
455476

456477
// "AMD Instinct MI300 Series Product Offerings" in Page 23 of
457478
// https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf
@@ -580,6 +601,8 @@ std::optional<TargetDetails> getAMDGPUTargetDetails(StringRef target) {
580601
// See https://llvm.org/docs/AMDGPUUsage.html#processors for gfxN to
581602
// cdnaN/rdnaN mapping.
582603
return llvm::StringSwitch<std::optional<TargetDetails>>(target.lower())
604+
.Case("mi355x", TargetDetails{cdna4Wgp, &mi355xChip})
605+
.Case("mi350x", TargetDetails{cdna4Wgp, &mi350xChip})
583606
.Cases("cdna4", "gfx950", TargetDetails{cdna4Wgp, nullptr})
584607
.Case("mi325x", TargetDetails{cdna3Wgp, &mi325xChip})
585608
.Case("mi300x", TargetDetails{cdna3Wgp, &mi300xChip})
@@ -636,6 +659,7 @@ StringRef normalizeAMDGPUTarget(StringRef target) {
636659
return target;
637660

638661
return llvm::StringSwitch<StringRef>(target.lower())
662+
.Cases("mi350x", "mi355x", "gfx950")
639663
.Cases("mi300a", "mi300x", "mi308x", "mi325x", "gfx942")
640664
.Cases("mi250x", "mi250", "mi210", "cdna2", "gfx90a")
641665
.Cases("mi100", "cdna1", "gfx908")

docs/website/docs/guides/deployment-configurations/gpu-rocm.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,8 @@ architectures:
193193
| AMD MI300X | `mi300x` | `gfx942` | `cdna3` |
194194
| AMD MI308X | `mi308x` | `gfx942` | `cdna3` |
195195
| AMD MI325X | `mi325x` | `gfx942` | `cdna3` |
196+
| AMD MI350X | `mi350x` | `gfx950` | `cdna4` |
197+
| AMD MI355X | `mi355x` | `gfx950` | `cdna4` |
196198
| AMD PRO V710 | `v710` | `gfx1101` | `rdna3` |
197199
| AMD PRO W7700 | `w7700` | `gfx1101` | `rdna3` |
198200
| AMD PRO W7800 | `w7800` | `gfx1100` | `rdna3` |

0 commit comments

Comments
 (0)