Skip to content

Commit ec6e00b

Browse files
authored
[Codegen][Tuner] Add support for per-sku tuning spec (iree-org#19762)
This PR implements support for per-sku tuning specs, and nd then per-architecture as a fallback, which is relevant to task outlined in iree-org#19720. --------- Signed-off-by: Bangtian Liu <[email protected]>
1 parent bbe7f5c commit ec6e00b

File tree

5 files changed

+68
-25
lines changed

5 files changed

+68
-25
lines changed

compiler/plugins/target/ROCM/builtins/tuning/test/spec_gfx942.mlir

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,23 @@
44
// RUN: --iree-codegen-notify-transform-strategy-application \
55
// RUN: --verify-diagnostics %s | FileCheck %s
66

7+
// RUN: iree-opt --split-input-file --iree-gpu-test-target=mi300x@hip \
8+
// RUN: --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-configure-target-executable-variants{target=rocm})))" \
9+
// RUN: --iree-codegen-enable-default-tuning-specs \
10+
// RUN: --iree-codegen-notify-transform-strategy-application \
11+
// RUN: --verify-diagnostics %s | FileCheck %s --check-prefix=MI300X
12+
713
// Check that the default configuration for mmt_2048x1280x5120_f16_f16_f32
814
// applies to the `linalg.matmul_transpose_b` below.
915

1016
// CHECK-LABEL: func.func @mmt_2048x1280x5120_f16_f16_f32
1117
// CHECK: linalg.generic
1218
// CHECK-SAME: __tuning_spec_applied__
1319

20+
// MI300X-LABEL: func.func @mmt_2048x1280x5120_f16_f16_f32
21+
// MI300X: linalg.generic
22+
// MI300X-SAME: __tuning_spec_applied__
23+
1424
#pipeline_layout = #hal.pipeline.layout<bindings = [
1525
#hal.pipeline.binding<storage_buffer>,
1626
#hal.pipeline.binding<storage_buffer>,

compiler/plugins/target/ROCM/test/target_device_features.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
// GFX942-SAME: subgroup_size_choices = [64], max_workgroup_sizes = [1024, 1024, 1024],
2222
// GFX942-SAME: max_thread_count_per_workgroup = 1024, max_workgroup_memory_bytes = 65536,
2323
// GFX942-SAME: max_workgroup_counts = [2147483647, 2147483647, 2147483647],
24-
// MI300X: chip = <wgp_count = 304>>
25-
// MI300A: chip = <wgp_count = 228>>
26-
// MI308X: chip = <wgp_count = 80>>
24+
// MI300X: chip = <wgp_count = 304, sku = "mi300x">>
25+
// MI300A: chip = <wgp_count = 228, sku = "mi300a">>
26+
// MI308X: chip = <wgp_count = 80, sku = "mi308x">>
2727

2828
// GFX941: target = #iree_gpu.target<arch = "gfx941",
2929
// GFX941-SAME: features = "+sramecc,-xnack"

compiler/src/iree/compiler/Codegen/Common/MaterializeTuningSpecsPass.cpp

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,18 @@ getUserTuningSpec(ModuleOp module, IREE::Codegen::IREECodegenDialect &dialect) {
111111
return *maybeTransformLibrary;
112112
}
113113

114+
static std::optional<StringRef> fetchDefaultTuningSpec(StringRef identifier) {
115+
std::string tuningSpecName =
116+
llvm::formatv("iree_default_tuning_spec_{}.mlir", identifier);
117+
std::optional<StringRef> tuningSpecSource;
118+
119+
EmbeddedDataDirectory::withGlobal([&](EmbeddedDataDirectory &dir) {
120+
tuningSpecSource = dir.getFile(tuningSpecName);
121+
});
122+
123+
return tuningSpecSource;
124+
}
125+
114126
static FailureOr<ModuleOp>
115127
getDefaultTuningSpec(ModuleOp module,
116128
IREE::Codegen::IREECodegenDialect &dialect) {
@@ -123,14 +135,29 @@ getDefaultTuningSpec(ModuleOp module,
123135
return failure();
124136
}
125137

126-
// Try to look up the default tuning spec for this architecture, if any.
127-
StringRef arch = gpuTarget.getArch();
128-
std::string defaultTuningSpecName =
129-
llvm::formatv("iree_default_tuning_spec_{}.mlir", arch);
138+
std::optional<StringRef> sku;
139+
if (IREE::GPU::TargetChipAttr chip = gpuTarget.getChip()) {
140+
if (StringAttr chipSku = chip.getSku()) {
141+
sku = chipSku.getValue();
142+
}
143+
}
144+
145+
std::string defaultTuningSpecName;
130146
std::optional<StringRef> defaultTuningSpecSource;
131-
EmbeddedDataDirectory::withGlobal([&](EmbeddedDataDirectory &dir) {
132-
defaultTuningSpecSource = dir.getFile(defaultTuningSpecName);
133-
});
147+
if (sku) {
148+
// GPUs with the same ISA may have different hardware characteristics such
149+
// as the number of workgroup processors and power limits, Look up
150+
// SKU-specific tuning spec for optimal performance.
151+
defaultTuningSpecSource = fetchDefaultTuningSpec(*sku);
152+
}
153+
154+
if (!defaultTuningSpecSource) {
155+
// If SKU-specific spec is not found, fall back to the default
156+
// architecture-based tuning spec to ensure broader compatibility.
157+
StringRef arch = gpuTarget.getArch();
158+
defaultTuningSpecSource = fetchDefaultTuningSpec(arch);
159+
}
160+
134161
if (!defaultTuningSpecSource) {
135162
// Not all architectures are expected to provide default tuning specs, so
136163
// this shouldn't be considered a hard error (but that's up to the caller).

compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,8 @@ def IREEGPU_TargetChipAttr : AttrDef<IREEGPU_Dialect, "TargetChip"> {
416416
let parameters = (ins
417417
"uint32_t":$wgp_count,
418418

419+
// An optional SKU identifier to distinguish different models.
420+
OptionalParameter<"StringAttr">:$sku,
419421
// An optional extra dict
420422
// This field allows to inject more features/limits not supported in the
421423
// above list for better flexibility.

compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.cpp

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ struct WgpDetails {
5454
// Chip level feature/limit details
5555
struct ChipDetails {
5656
uint32_t wgpCount;
57+
std::optional<StringRef> sku;
5758
};
5859

5960
// Full target details
@@ -116,9 +117,13 @@ TargetAttr createTargetAttr(const TargetDetails &details, StringRef arch,
116117
DictionaryAttr{});
117118

118119
TargetChipAttr targetChip;
119-
if (details.chip)
120-
targetChip =
121-
TargetChipAttr::get(context, details.chip->wgpCount, DictionaryAttr{});
120+
if (details.chip) {
121+
auto skuAttr = details.chip->sku
122+
? StringAttr::get(context, *details.chip->sku)
123+
: StringAttr{};
124+
targetChip = TargetChipAttr::get(context, details.chip->wgpCount, skuAttr,
125+
DictionaryAttr{});
126+
}
122127

123128
return TargetAttr::get(context, arch, features, targetWgp, targetChip);
124129
}
@@ -279,28 +284,27 @@ std::optional<TargetDetails> getAMDGPUTargetDetails(StringRef target) {
279284

280285
// "AMD Instinct MI300 Series Product Offerings" in Page 23 of
281286
// https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf
282-
static const ChipDetails mi300xChip = {304};
283-
static const ChipDetails mi300aChip = {228};
284-
static const ChipDetails mi308xChip = {80};
287+
static const ChipDetails mi300xChip = {304, "mi300x"};
288+
static const ChipDetails mi300aChip = {228, "mi300a"};
289+
static const ChipDetails mi308xChip = {80, "mi308x"};
285290

286291
// "AMD Instinct MI200 Series Accelerator Product Offerings" in Page 14 of
287292
// https://www.amd.com/content/dam/amd/en/documents/instinct-business-docs/white-papers/amd-cdna2-white-paper.pdf
288-
static const ChipDetails mi250xChip = {220};
289-
static const ChipDetails mi250Chip = {208};
290-
static const ChipDetails mi210Chip = {104};
293+
static const ChipDetails mi250xChip = {220, "mi250x"};
294+
static const ChipDetails mi250Chip = {208, "mi250"};
295+
static const ChipDetails mi210Chip = {104, "mi210"};
291296

292297
// "AMD CDNA Architecture Compute Units" in Page 5 of
293298
// https://www.amd.com/content/dam/amd/en/documents/instinct-business-docs/white-papers/amd-cdna-white-paper.pdf
294-
static const ChipDetails mi100Chip = {120};
299+
static const ChipDetails mi100Chip = {120, "mi100"};
295300

296-
static const ChipDetails rx7900xtxChip = {96};
297-
static const ChipDetails rx7900xtChip = {84};
298-
static const ChipDetails rx7800xtChip = {60};
299-
static const ChipDetails rx7700xtChip = {54};
301+
static const ChipDetails rx7900xtxChip = {96, "rx7900xtx"};
302+
static const ChipDetails rx7900xtChip = {84, "rx7900xt"};
303+
static const ChipDetails rx7800xtChip = {60, "rx7800xt"};
304+
static const ChipDetails rx7700xtChip = {54, "rx7700xt"};
300305

301306
// See https://llvm.org/docs/AMDGPUUsage.html#processors for gfxN to
302307
// cdnaN/rdnaN mapping.
303-
304308
return llvm::StringSwitch<std::optional<TargetDetails>>(target.lower())
305309
.Case("mi300x", TargetDetails{cdna3Wgp, &mi300xChip})
306310
.Case("mi300a", TargetDetails{cdna3Wgp, &mi300aChip})

0 commit comments

Comments
 (0)