99#include < optional>
1010#include " iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.h"
1111#include " iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUEnums.h"
12+ #include " llvm/ADT/DenseMap.h"
1213#include " llvm/ADT/STLExtras.h"
1314#include " llvm/ADT/StringSwitch.h"
1415#include " mlir/IR/Attributes.h"
@@ -61,6 +62,18 @@ struct WgpDetails {
6162struct ChipDetails {
6263 uint32_t wgpCount;
6364 std::optional<StringRef> sku;
65+ // Aggregate chip-level bandwidth in TB/s.
66+ std::optional<float > peakMemoryBandwidthTBs;
67+ // Optional per-data-type compute performance (TFLOPs/s).
68+ llvm::SmallDenseMap<ComputeBitwidths, float > peakPerfTFLOPs;
69+
70+ ChipDetails (
71+ uint32_t wgp, std::optional<llvm::StringRef> s = std::nullopt ,
72+ std::optional<float > bw = std::nullopt ,
73+ std::initializer_list<llvm::detail::DenseMapPair<ComputeBitwidths, float >>
74+ perf = {})
75+ : wgpCount(wgp), sku(s), peakMemoryBandwidthTBs(bw),
76+ peakPerfTFLOPs (perf) {}
6477};
6578
6679// Full target details
@@ -139,8 +152,26 @@ TargetAttr createTargetAttr(const TargetDetails &details, StringRef arch,
139152 auto skuAttr = details.chip ->sku
140153 ? StringAttr::get (context, *details.chip ->sku )
141154 : StringAttr{};
155+
156+ FloatAttr peakMemoryBandwidthAttr =
157+ details.chip ->peakMemoryBandwidthTBs
158+ ? FloatAttr::get (Float32Type::get (context),
159+ *details.chip ->peakMemoryBandwidthTBs )
160+ : FloatAttr{};
161+
162+ DictionaryAttr peakPerfTFLOPsAttr = {};
163+ if (!details.chip ->peakPerfTFLOPs .empty ()) {
164+ SmallVector<NamedAttribute> attributes = llvm::map_to_vector (
165+ details.chip ->peakPerfTFLOPs , [&](const auto &pair) {
166+ return NamedAttribute (
167+ stringifyComputeBitwidths (pair.first ),
168+ FloatAttr::get (Float32Type::get (context), pair.second ));
169+ });
170+ peakPerfTFLOPsAttr = DictionaryAttr::get (context, attributes);
171+ }
142172 targetChip = TargetChipAttr::get (context, details.chip ->wgpCount , skuAttr,
143- DictionaryAttr{});
173+ peakMemoryBandwidthAttr,
174+ peakPerfTFLOPsAttr, DictionaryAttr{});
144175 }
145176
146177 return TargetAttr::get (context, arch, features, targetWgp, targetChip);
@@ -424,20 +455,73 @@ std::optional<TargetDetails> getAMDGPUTargetDetails(StringRef target) {
424455
425456 // "AMD Instinct MI300 Series Product Offerings" in Page 23 of
426457 // https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf
427- static const ChipDetails mi300xChip = {304 , " mi300x" };
428- static const ChipDetails mi300aChip = {228 , " mi300a" };
429- static const ChipDetails mi308xChip = {80 , " mi308x" };
430- static const ChipDetails mi325xChip = {304 , " mi325x" };
458+ static const ChipDetails mi300xChip = {304 ,
459+ " mi300x" ,
460+ 5 .3f ,
461+ {{ComputeBitwidths::FP32, 163 .4f },
462+ {ComputeBitwidths::FP16, 1307 .4f },
463+ {ComputeBitwidths::Int8, 2614 .9f },
464+ {ComputeBitwidths::FP8, 2614 .9f }}};
465+
466+ static const ChipDetails mi300aChip = {228 ,
467+ " mi300a" ,
468+ 5 .3f ,
469+ {{ComputeBitwidths::FP32, 122 .6f },
470+ {ComputeBitwidths::FP16, 980 .6f },
471+ {ComputeBitwidths::Int8, 1961 .2f },
472+ {ComputeBitwidths::FP8, 1961 .2f }}};
473+
474+ static const ChipDetails mi308xChip = {
475+ 80 ,
476+ " mi308x" ,
477+ 5 .3f ,
478+ // Peak fp32 perf estimated from:
479+ // 80(CUs)*4(SIMDs)*1.42(Freq)*(16*16*4)(GEMM shape)*2(mul+add)/32(latency
480+ // instruction)
481+ {{ComputeBitwidths::FP32, 29 .0f },
482+ {ComputeBitwidths::FP16, 188 .4f },
483+ {ComputeBitwidths::FP8, 176 .8f },
484+ // Estimated int8 performance based on FP8
485+ {ComputeBitwidths::Int8, 176 .8f }}};
486+
487+ static const ChipDetails mi325xChip = {304 ,
488+ " mi325x" ,
489+ 5 .3f ,
490+ {{ComputeBitwidths::FP32, 163 .4f },
491+ {ComputeBitwidths::FP16, 1307 .4f },
492+ {ComputeBitwidths::Int8, 2614 .9f },
493+ {ComputeBitwidths::FP8, 2614 .9f }}};
431494
432495 // "AMD Instinct MI200 Series Accelerator Product Offerings" in Page 14 of
433496 // https://www.amd.com/content/dam/amd/en/documents/instinct-business-docs/white-papers/amd-cdna2-white-paper.pdf
434- static const ChipDetails mi250xChip = {220 , " mi250x" };
435- static const ChipDetails mi250Chip = {208 , " mi250" };
436- static const ChipDetails mi210Chip = {104 , " mi210" };
497+ static const ChipDetails mi250xChip = {220 ,
498+ " mi250x" ,
499+ 3 .2f ,
500+ {{ComputeBitwidths::FP32, 95 .7f },
501+ {ComputeBitwidths::FP16, 383 .0f },
502+ {ComputeBitwidths::Int8, 383 .0f }}};
503+
504+ static const ChipDetails mi250Chip = {208 ,
505+ " mi250" ,
506+ 3 .2f ,
507+ {{ComputeBitwidths::FP32, 90 .5f },
508+ {ComputeBitwidths::FP16, 362 .1f },
509+ {ComputeBitwidths::Int8, 362 .1f }}};
510+ static const ChipDetails mi210Chip = {104 ,
511+ " mi210" ,
512+ 1 .6f ,
513+ {{ComputeBitwidths::FP32, 45 .3f },
514+ {ComputeBitwidths::FP16, 181 .0f },
515+ {ComputeBitwidths::Int8, 181 .0f }}};
437516
438517 // "AMD CDNA Architecture Compute Units" in Page 5 of
439518 // https://www.amd.com/content/dam/amd/en/documents/instinct-business-docs/white-papers/amd-cdna-white-paper.pdf
440- static const ChipDetails mi100Chip = {120 , " mi100" };
519+ static const ChipDetails mi100Chip = {120 ,
520+ " mi100" ,
521+ 1 .23f ,
522+ {{ComputeBitwidths::FP32, 46 .1f },
523+ {ComputeBitwidths::FP16, 184 .6f },
524+ {ComputeBitwidths::Int8, 184 .6f }}};
441525
442526 // --- RDNA --- //
443527
@@ -450,10 +534,38 @@ std::optional<TargetDetails> getAMDGPUTargetDetails(StringRef target) {
450534
451535 // AMD RDNA4 architecture:
452536 // https://www.amd.com/en/newsroom/press-releases/2025-2-28-amd-unveils-next-generation-amd-rdna-4-architectu.html.
453- static const ChipDetails r9700Chip = {64 / 2 , " r9700" };
454- static const ChipDetails rx9070xtChip = {64 / 2 , " rx9070xt" };
455- static const ChipDetails rx9070Chip = {56 / 2 , " rx9070" };
456- static const ChipDetails rx9060xtChip = {32 / 2 , " rx9060xt" };
537+ // https://www.amd.com/en/products/graphics/workstations/radeon-ai-pro/ai-9000-series/amd-radeon-ai-pro-r9700.html
538+ static const ChipDetails r9700Chip = {64 / 2 ,
539+ " r9700" ,
540+ 0 .64f ,
541+ {{ComputeBitwidths::FP32, 47 .8f },
542+ {ComputeBitwidths::FP16, 191 .0f },
543+ {ComputeBitwidths::Int8, 383 .0f },
544+ {ComputeBitwidths::FP8, 383 .0f }}};
545+ // https://www.amd.com/en/products/graphics/desktops/radeon/9000-series/amd-radeon-rx-9070xt.html
546+ static const ChipDetails rx9070xtChip = {64 / 2 ,
547+ " rx9070xt" ,
548+ 0 .64f ,
549+ {{ComputeBitwidths::FP32, 48 .7f },
550+ {ComputeBitwidths::FP16, 195 .0f },
551+ {ComputeBitwidths::Int8, 389 .0f },
552+ {ComputeBitwidths::FP8, 389 .0f }}};
553+ // https://www.amd.com/en/products/graphics/desktops/radeon/9000-series/amd-radeon-rx-9070.html
554+ static const ChipDetails rx9070Chip = {56 / 2 ,
555+ " rx9070" ,
556+ 0 .64f ,
557+ {{ComputeBitwidths::FP32, 36 .1f },
558+ {ComputeBitwidths::FP16, 145 .0f },
559+ {ComputeBitwidths::Int8, 289 .0f },
560+ {ComputeBitwidths::FP8, 289 .0f }}};
561+ // https://www.amd.com/en/products/graphics/desktops/radeon/9000-series/amd-radeon-rx-9060xt.html
562+ static const ChipDetails rx9060xtChip = {32 / 2 ,
563+ " rx9060xt" ,
564+ 0 .32f ,
565+ {{ComputeBitwidths::FP32, 25 .6f },
566+ {ComputeBitwidths::FP16, 103 .0f },
567+ {ComputeBitwidths::Int8, 205 .0f },
568+ {ComputeBitwidths::FP8, 205 .0f }}};
457569
458570 // AMD RDNA3.
459571 static const ChipDetails rx7900xtxChip = {96 / 2 , " rx7900xtx" };
0 commit comments