Skip to content

Commit b1d37c0

Browse files
committed
[uArch][XeGPU] Add XeGPU uArch definition.
The uArch infrastructure provides: - A set data structures to represent, uArch and it's necessary components (e.g., instructions, register-files, caches). - A set of utility interfaces that are common to a family of ops (e.g., mma ops, 2DBlockIO ops). The implementation of these interfaces are provided by the specific instructions. Each family of ops provides these 5 common APIs. However, some family of ops may have more utility APIs. The common 5 APIs are: - getSupportedShapes - getSupportedTypes - checkSupportedShapesAndTypes - checkSupportedTypes - validate Add support for PVC and BMG architectures. Add support for DPAS instruction.
1 parent db5f7dc commit b1d37c0

File tree

11 files changed

+753
-0
lines changed

11 files changed

+753
-0
lines changed
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
//===--- IntelGpuXe2.h ---------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file
10+
/// Xe2 uArch definition.
11+
///
12+
//
13+
//===----------------------------------------------------------------------===//
14+
#ifndef MLIR_DIALECT_XEGPU_UARCH_INTEL_GPU_XE2_H
15+
#define MLIR_DIALECT_XEGPU_UARCH_INTEL_GPU_XE2_H
16+
17+
#include "mlir/Dialect/XeGPU/uArch/uArchInterfaces.h"
18+
#include "mlir/IR/BuiltinTypes.h"
19+
#include "mlir/IR/TypeUtilities.h"
20+
#include <map>
21+
#include <string>
22+
#include <vector>
23+
24+
namespace mlir {
25+
namespace xegpu {
26+
namespace uArch {
27+
namespace Xe2Plus {
28+
struct XeCoreInfo {
29+
uint32_t num_threads;
30+
SharedMemory shared_memory;
31+
uint32_t num_vector_units;
32+
uint32_t num_matrix_units;
33+
34+
// Constructor
35+
XeCoreInfo(uint32_t num_threads, const SharedMemory &shared_memory,
36+
uint32_t num_vector_units, uint32_t num_matrix_units)
37+
: num_threads(num_threads), shared_memory(shared_memory),
38+
num_vector_units(num_vector_units), num_matrix_units(num_matrix_units) {
39+
}
40+
};
41+
42+
struct Xe2Plus : public uArch {
43+
XeCoreInfo xe_core;
44+
Xe2Plus(
45+
const std::string &archName, const std::string &archDescription,
46+
const XeCoreInfo &xeCore,
47+
const std::vector<uArchHierarchyComponent> &hierarchy = {},
48+
const std::map<std::string, RegisterFileInfo> &regInfo = {},
49+
const std::vector<CacheInfo> &cacheInfo = {},
50+
const std::map<std::string, std::shared_ptr<Instruction>> &instrs = {})
51+
: uArch(archName, archDescription, hierarchy, regInfo, cacheInfo, instrs),
52+
xe_core(xeCore) {}
53+
};
54+
55+
// struct to represent DPAS instruction
56+
struct DPASInstruction : public Instruction, public MMAInstructionInterface {
57+
DPASInstruction()
58+
: Instruction("dpas", // name
59+
"Dot Product Accumulate") // description
60+
{}
61+
62+
// Override all virtuals from MatrixOpInterface
63+
virtual std::vector<std::pair<uint32_t, uint32_t>>
64+
getSupportedShapes(mlir::Type dataType, MMAOpndEnum matrixType) override;
65+
virtual std::vector<mlir::Type>
66+
getSupportedTypes(MLIRContext &context, MMAOpndEnum matrixType) override;
67+
virtual bool
68+
checkSupportedShapesAndTypes(std::pair<uint32_t, uint32_t> AShape,
69+
std::pair<uint32_t, uint32_t> BShape,
70+
std::pair<uint32_t, uint32_t> CShape,
71+
std::pair<uint32_t, uint32_t> DShape,
72+
mlir::Type AType, mlir::Type BType,
73+
mlir::Type CType, mlir::Type DType) override;
74+
virtual bool checkSupportedTypes(mlir::Type AType, mlir::Type BType,
75+
mlir::Type CType, mlir::Type DType) override;
76+
virtual bool validate(std::pair<uint32_t, uint32_t> AShape,
77+
std::pair<uint32_t, uint32_t> BShape,
78+
std::pair<uint32_t, uint32_t> CShape,
79+
std::pair<uint32_t, uint32_t> DShape, mlir::Type AType,
80+
mlir::Type BType, mlir::Type CType,
81+
mlir::Type DType) override;
82+
virtual std::vector<uint32_t> getSupportedM(mlir::Type type) override;
83+
virtual std::vector<uint32_t> getSupportedK(mlir::Type type) override;
84+
virtual std::vector<uint32_t> getSupportedN(mlir::Type type) override;
85+
};
86+
87+
namespace PVCuArch {
88+
struct PVCuArch : public Xe2Plus {
89+
// Maintaines ownership of the instructions owned by PVUarch
90+
std::vector<std::shared_ptr<Instruction>> owned_instructions;
91+
PVCuArch()
92+
: Xe2Plus("pvc", // archName
93+
"Ponte Vecchio Architecture", // archDescription
94+
XeCoreInfo(8, SharedMemory(512 * 1024, 4), 8, 8), // xeCore
95+
{/* register_file_info */}, // Optional: empty
96+
{/* cache_info */}, // Optional: empty
97+
{/* instructions */} // Optional: empty
98+
) {
99+
// Initialize uArchHierarchy
100+
this->uArch_hierarchy.push_back(uArchHierarchyComponent("thread", 0));
101+
this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeCore", 8));
102+
this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeSlice", 16));
103+
this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeStack", 4));
104+
this->uArch_hierarchy.push_back(uArchHierarchyComponent("gpu", 2));
105+
// Intialize register file info
106+
// GRF
107+
this->register_file_info.emplace(
108+
"GRF",
109+
RegisterFileInfo(64 * 1024, // size in bits
110+
{"small", "large"}, // GRF modes
111+
{128, 256}, // registers per thread per mode
112+
0, // number of banks
113+
0 // bank size
114+
));
115+
// Initialize cache info
116+
// L1 cache, XeCore level
117+
this->cache_info.push_back(
118+
CacheInfo(512 * 1024, 64, this->uArch_hierarchy[1]));
119+
// L3 cache, XeStack level
120+
this->cache_info.push_back(
121+
CacheInfo(512 * 1024, 64, this->uArch_hierarchy[3]));
122+
123+
// Add the instructions
124+
auto dpas = std::make_shared<DPASInstruction>();
125+
instructions.emplace(dpas->getName(), dpas);
126+
// instructions[dpas->name] = dpas.get();
127+
owned_instructions.push_back(dpas);
128+
}
129+
};
130+
} // namespace PVCuArch
131+
132+
namespace BMGuArch {
133+
struct BMGuArch : public Xe2Plus {
134+
// Maintaines ownership of the instructions owned by PVUarch
135+
std::vector<std::shared_ptr<Instruction>> owned_instructions;
136+
BMGuArch()
137+
: Xe2Plus("bmg", // archName
138+
"Battlemage Architecture", // archDescription
139+
XeCoreInfo(8, SharedMemory(256 * 1024, 4), 8, 8), // xeCore
140+
{/* register_file_info */}, // Optional: empty
141+
{/* cache_info */}, // Optional: empty
142+
{/* instructions */}, // Optional: empty
143+
{/* restrictions */} // Optional: empty
144+
) {
145+
// Initialize uArchHierarchy
146+
this->uArch_hierarchy.push_back(uArchHierarchyComponent("thread", 0));
147+
this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeCore", 8));
148+
this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeSlice", 4));
149+
this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeStack", 5));
150+
this->uArch_hierarchy.push_back(uArchHierarchyComponent("gpu", 1));
151+
// Intialize register file info
152+
// GRF
153+
this->register_file_info["GRF"] =
154+
RegisterFileInfo(64 * 1024, // size in bits
155+
{"small", "large"}, // GRF modes
156+
{128, 256}, // registers per thread per mode
157+
0, // number of banks
158+
0 // bank size
159+
);
160+
// Initialize cache info
161+
// L1 cache, XeCore level
162+
this->cache_info.push_back(
163+
CacheInfo(256 * 1024, 64, this->uArch_hierarchy[1]));
164+
// L3 cache, XeStack level
165+
this->cache_info.push_back(
166+
CacheInfo(18 * 1024 * 1024, 256, this->uArch_hierarchy[3]));
167+
168+
// Add the instructions
169+
auto dpas = std::make_shared<DPASInstruction>();
170+
instructions.emplace(dpas->getName(), dpas);
171+
// instructions[dpas->name] = dpas.get();
172+
owned_instructions.push_back(dpas);
173+
}
174+
};
175+
} // namespace BMGuArch
176+
177+
} // namespace Xe2Plus
178+
} // namespace uArch
179+
} // namespace xegpu
180+
} // namespace mlir
181+
182+
#endif // MLIR_DIALECT_XEGPU_UARCH_INTEL_GPU_XE2_H

0 commit comments

Comments
 (0)