|
| 1 | +//===--- IntelGpuXe2.h ---------------------------------------*- C++ -*-===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +// |
| 9 | +/// \file |
| 10 | +/// Xe2 uArch definition. |
| 11 | +/// |
| 12 | +// |
| 13 | +//===----------------------------------------------------------------------===// |
| 14 | +#ifndef MLIR_DIALECT_XEGPU_UARCH_INTEL_GPU_XE2_H |
| 15 | +#define MLIR_DIALECT_XEGPU_UARCH_INTEL_GPU_XE2_H |
| 16 | + |
| 17 | +#include "mlir/Dialect/XeGPU/uArch/uArchInterfaces.h" |
| 18 | +#include "mlir/IR/BuiltinTypes.h" |
| 19 | +#include "mlir/IR/TypeUtilities.h" |
| 20 | +#include <map> |
| 21 | +#include <string> |
| 22 | +#include <vector> |
| 23 | + |
| 24 | +namespace mlir { |
| 25 | +namespace xegpu { |
| 26 | +namespace uArch { |
| 27 | +namespace Xe2Plus { |
| 28 | +struct XeCoreInfo { |
| 29 | + uint32_t num_threads; |
| 30 | + SharedMemory shared_memory; |
| 31 | + uint32_t num_vector_units; |
| 32 | + uint32_t num_matrix_units; |
| 33 | + |
| 34 | + // Constructor |
| 35 | + XeCoreInfo(uint32_t num_threads, const SharedMemory &shared_memory, |
| 36 | + uint32_t num_vector_units, uint32_t num_matrix_units) |
| 37 | + : num_threads(num_threads), shared_memory(shared_memory), |
| 38 | + num_vector_units(num_vector_units), num_matrix_units(num_matrix_units) { |
| 39 | + } |
| 40 | +}; |
| 41 | + |
| 42 | +struct Xe2Plus : public uArch { |
| 43 | + XeCoreInfo xe_core; |
| 44 | + Xe2Plus( |
| 45 | + const std::string &archName, const std::string &archDescription, |
| 46 | + const XeCoreInfo &xeCore, |
| 47 | + const std::vector<uArchHierarchyComponent> &hierarchy = {}, |
| 48 | + const std::map<std::string, RegisterFileInfo> ®Info = {}, |
| 49 | + const std::vector<CacheInfo> &cacheInfo = {}, |
| 50 | + const std::map<std::string, std::shared_ptr<Instruction>> &instrs = {}) |
| 51 | + : uArch(archName, archDescription, hierarchy, regInfo, cacheInfo, instrs), |
| 52 | + xe_core(xeCore) {} |
| 53 | +}; |
| 54 | + |
| 55 | +// struct to represent DPAS instruction |
| 56 | +struct DPASInstruction : public Instruction, public MMAInstructionInterface { |
| 57 | + DPASInstruction() |
| 58 | + : Instruction("dpas", // name |
| 59 | + "Dot Product Accumulate") // description |
| 60 | + {} |
| 61 | + |
| 62 | + // Override all virtuals from MatrixOpInterface |
| 63 | + virtual std::vector<std::pair<uint32_t, uint32_t>> |
| 64 | + getSupportedShapes(mlir::Type dataType, MMAOpndEnum matrixType) override; |
| 65 | + virtual std::vector<mlir::Type> |
| 66 | + getSupportedTypes(MLIRContext &context, MMAOpndEnum matrixType) override; |
| 67 | + virtual bool |
| 68 | + checkSupportedShapesAndTypes(std::pair<uint32_t, uint32_t> AShape, |
| 69 | + std::pair<uint32_t, uint32_t> BShape, |
| 70 | + std::pair<uint32_t, uint32_t> CShape, |
| 71 | + std::pair<uint32_t, uint32_t> DShape, |
| 72 | + mlir::Type AType, mlir::Type BType, |
| 73 | + mlir::Type CType, mlir::Type DType) override; |
| 74 | + virtual bool checkSupportedTypes(mlir::Type AType, mlir::Type BType, |
| 75 | + mlir::Type CType, mlir::Type DType) override; |
| 76 | + virtual bool validate(std::pair<uint32_t, uint32_t> AShape, |
| 77 | + std::pair<uint32_t, uint32_t> BShape, |
| 78 | + std::pair<uint32_t, uint32_t> CShape, |
| 79 | + std::pair<uint32_t, uint32_t> DShape, mlir::Type AType, |
| 80 | + mlir::Type BType, mlir::Type CType, |
| 81 | + mlir::Type DType) override; |
| 82 | + virtual std::vector<uint32_t> getSupportedM(mlir::Type type) override; |
| 83 | + virtual std::vector<uint32_t> getSupportedK(mlir::Type type) override; |
| 84 | + virtual std::vector<uint32_t> getSupportedN(mlir::Type type) override; |
| 85 | +}; |
| 86 | + |
| 87 | +namespace PVCuArch { |
| 88 | +struct PVCuArch : public Xe2Plus { |
| 89 | + // Maintaines ownership of the instructions owned by PVUarch |
| 90 | + std::vector<std::shared_ptr<Instruction>> owned_instructions; |
| 91 | + PVCuArch() |
| 92 | + : Xe2Plus("pvc", // archName |
| 93 | + "Ponte Vecchio Architecture", // archDescription |
| 94 | + XeCoreInfo(8, SharedMemory(512 * 1024, 4), 8, 8), // xeCore |
| 95 | + {/* register_file_info */}, // Optional: empty |
| 96 | + {/* cache_info */}, // Optional: empty |
| 97 | + {/* instructions */} // Optional: empty |
| 98 | + ) { |
| 99 | + // Initialize uArchHierarchy |
| 100 | + this->uArch_hierarchy.push_back(uArchHierarchyComponent("thread", 0)); |
| 101 | + this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeCore", 8)); |
| 102 | + this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeSlice", 16)); |
| 103 | + this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeStack", 4)); |
| 104 | + this->uArch_hierarchy.push_back(uArchHierarchyComponent("gpu", 2)); |
| 105 | + // Intialize register file info |
| 106 | + // GRF |
| 107 | + this->register_file_info.emplace( |
| 108 | + "GRF", |
| 109 | + RegisterFileInfo(64 * 1024, // size in bits |
| 110 | + {"small", "large"}, // GRF modes |
| 111 | + {128, 256}, // registers per thread per mode |
| 112 | + 0, // number of banks |
| 113 | + 0 // bank size |
| 114 | + )); |
| 115 | + // Initialize cache info |
| 116 | + // L1 cache, XeCore level |
| 117 | + this->cache_info.push_back( |
| 118 | + CacheInfo(512 * 1024, 64, this->uArch_hierarchy[1])); |
| 119 | + // L3 cache, XeStack level |
| 120 | + this->cache_info.push_back( |
| 121 | + CacheInfo(512 * 1024, 64, this->uArch_hierarchy[3])); |
| 122 | + |
| 123 | + // Add the instructions |
| 124 | + auto dpas = std::make_shared<DPASInstruction>(); |
| 125 | + instructions.emplace(dpas->getName(), dpas); |
| 126 | + // instructions[dpas->name] = dpas.get(); |
| 127 | + owned_instructions.push_back(dpas); |
| 128 | + } |
| 129 | +}; |
| 130 | +} // namespace PVCuArch |
| 131 | + |
| 132 | +namespace BMGuArch { |
| 133 | +struct BMGuArch : public Xe2Plus { |
| 134 | + // Maintaines ownership of the instructions owned by PVUarch |
| 135 | + std::vector<std::shared_ptr<Instruction>> owned_instructions; |
| 136 | + BMGuArch() |
| 137 | + : Xe2Plus("bmg", // archName |
| 138 | + "Battlemage Architecture", // archDescription |
| 139 | + XeCoreInfo(8, SharedMemory(256 * 1024, 4), 8, 8), // xeCore |
| 140 | + {/* register_file_info */}, // Optional: empty |
| 141 | + {/* cache_info */}, // Optional: empty |
| 142 | + {/* instructions */}, // Optional: empty |
| 143 | + {/* restrictions */} // Optional: empty |
| 144 | + ) { |
| 145 | + // Initialize uArchHierarchy |
| 146 | + this->uArch_hierarchy.push_back(uArchHierarchyComponent("thread", 0)); |
| 147 | + this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeCore", 8)); |
| 148 | + this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeSlice", 4)); |
| 149 | + this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeStack", 5)); |
| 150 | + this->uArch_hierarchy.push_back(uArchHierarchyComponent("gpu", 1)); |
| 151 | + // Intialize register file info |
| 152 | + // GRF |
| 153 | + this->register_file_info["GRF"] = |
| 154 | + RegisterFileInfo(64 * 1024, // size in bits |
| 155 | + {"small", "large"}, // GRF modes |
| 156 | + {128, 256}, // registers per thread per mode |
| 157 | + 0, // number of banks |
| 158 | + 0 // bank size |
| 159 | + ); |
| 160 | + // Initialize cache info |
| 161 | + // L1 cache, XeCore level |
| 162 | + this->cache_info.push_back( |
| 163 | + CacheInfo(256 * 1024, 64, this->uArch_hierarchy[1])); |
| 164 | + // L3 cache, XeStack level |
| 165 | + this->cache_info.push_back( |
| 166 | + CacheInfo(18 * 1024 * 1024, 256, this->uArch_hierarchy[3])); |
| 167 | + |
| 168 | + // Add the instructions |
| 169 | + auto dpas = std::make_shared<DPASInstruction>(); |
| 170 | + instructions.emplace(dpas->getName(), dpas); |
| 171 | + // instructions[dpas->name] = dpas.get(); |
| 172 | + owned_instructions.push_back(dpas); |
| 173 | + } |
| 174 | +}; |
| 175 | +} // namespace BMGuArch |
| 176 | + |
| 177 | +} // namespace Xe2Plus |
| 178 | +} // namespace uArch |
| 179 | +} // namespace xegpu |
| 180 | +} // namespace mlir |
| 181 | + |
| 182 | +#endif // MLIR_DIALECT_XEGPU_UARCH_INTEL_GPU_XE2_H |
0 commit comments