-
Notifications
You must be signed in to change notification settings - Fork 1
[XeGPU] uArch definition (PR 1/N) #2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: tmp_main
Are you sure you want to change the base?
Changes from 15 commits
6bf71a8
6c73a92
6e0cfb8
de635bf
08812d2
7c77428
01f50ed
1a114fc
f22558a
d879e6c
6aa590d
2079478
31af9a7
fdfd433
78a462b
3e8e9e0
5127729
6d971ff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,4 +71,5 @@ def XeGPUBlocking: Pass<"xegpu-blocking"> { | |
]; | ||
} | ||
|
||
|
||
#endif // MLIR_DIALECT_XEGPU_TRANSFORMS_PASSES_TD |
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,227 @@ | ||
//===--- uArch.h ---------------------------------------*- C++ -*-===// | ||
|
||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
/// \file | ||
/// PVC uArch definition. | ||
/// | ||
// | ||
//===----------------------------------------------------------------------===// | ||
#ifndef MLIR_DIALECT_XEGPU_UTILS_INTEL_GPU_XE2_H | ||
#define MLIR_DIALECT_XEGPU_UTILS_INTEL_GPU_XE2_H | ||
|
||
#include "mlir/Dialect/XeGPU/uArch/uArch.h" | ||
#include "mlir/IR/BuiltinTypes.h" | ||
#include "mlir/IR/TypeUtilities.h" | ||
#include <map> | ||
#include <string> | ||
#include <vector> | ||
|
||
namespace mlir { | ||
namespace xegpu { | ||
namespace uArch { | ||
namespace Xe2Plus { | ||
struct XeCoreInfo { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can XeCoreInfo be used by other archs? |
||
uint32_t num_threads; | ||
SharedMemory shared_memory; | ||
uint32_t num_vector_units; | ||
uint32_t num_matrix_units; | ||
|
||
// Constructor | ||
XeCoreInfo(uint32_t num_threads, const SharedMemory &shared_memory, | ||
uint32_t num_vector_units, uint32_t num_matrix_units) | ||
: num_threads(num_threads), shared_memory(shared_memory), | ||
num_vector_units(num_vector_units), num_matrix_units(num_matrix_units) { | ||
} | ||
}; | ||
|
||
struct Xe2Plus : public uArch { | ||
XeCoreInfo xe_core; | ||
Xe2Plus( | ||
const std::string &archName, const std::string &archDescription, | ||
const XeCoreInfo &xeCore, | ||
const std::vector<uArchHierarchyComponent> &hierarchy = {}, | ||
const std::map<std::string, RegisterFileInfo> ®Info = {}, | ||
const std::vector<CacheInfo> &cacheInfo = {}, | ||
const std::map<std::string, std::shared_ptr<Instruction>> &instrs = {}, | ||
const std::vector<Restriction<> *> &restrs = {}) | ||
: uArch(archName, archDescription, hierarchy, regInfo, cacheInfo, instrs, | ||
restrs), | ||
xe_core(xeCore) {} | ||
}; | ||
|
||
// struct to represent DPAS instruction | ||
struct DPASInstruction : public Instruction, public MatrixOpInterface { | ||
// Range systolic_depth; | ||
// Range repreat_count; | ||
// Range execution_size; | ||
// std::map<std::string, uint32_t> ops_per_channel; | ||
// std::vector<std::vector<std::string>> supported_types; | ||
// std::map<std::string, std::map<std::string, std::vector<std::string>>> | ||
// matrix_size; | ||
|
||
// bool checkSupportedDPASTypes(mlir::Type dstType, mlir::Type src0Type, | ||
// mlir::Type src1Type, mlir::Type src2Type); | ||
|
||
DPASInstruction() | ||
: Instruction("dpas", // name | ||
"Dot Product Accumulate", // description | ||
"0xABCD", // opcode | ||
|
||
FunctionalUnit::Matrix, // functional_unit | ||
InstructionType::SIMD, // type | ||
InstructionScope::Subgroup, // scope | ||
UnitOfComputation::Matrix) // unit_of_computation | ||
{} | ||
|
||
// Override all virtuals from MatrixOpInterface | ||
virtual bool checkSupportedMMATypes(mlir::Type AType, mlir::Type BType, | ||
mlir::Type CType, | ||
mlir::Type DType) override; | ||
virtual std::vector<uint32_t> getSupportedM(mlir::Type type) override; | ||
virtual std::vector<uint32_t> getSupportedK(mlir::Type type) override; | ||
virtual std::vector<uint32_t> getSupportedN(mlir::Type type) override; | ||
virtual std::vector<std::pair<unsigned, unsigned>> | ||
getSupportedMatrix(mlir::Type type, MatrixType matrixType) override; | ||
|
||
}; | ||
|
||
// struct to represent Load2D/Store2D/Prefetch instruction | ||
struct LoadStorePrefetch2DInstruction : public Instruction { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. consider LoadStorePrefetch2DInstruction => 2dBlockIOInst There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
MemoryType memory_type; | ||
MemoryAccessType memory_access_type; | ||
// std::vector<std::string> supported_types; | ||
std::vector<uint32_t> supported_types_bitwidth; | ||
std::map<std::string, uint32_t> alignment; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is the string used for in alingment? the type name? |
||
std::vector<std::vector<uint32_t>> supported_tile_sizes; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tile => block to be consistent with the popular term "2dblockload" |
||
uint32_t min_surface_pitch; | ||
|
||
// Validate Array length restriction on a given tile | ||
bool validateArrayLenRestriction(std::vector<uint32_t> tile, | ||
uint32_t array_len, mlir::Type dataType) { | ||
|
||
Restriction<std::vector<uint32_t>, uint32_t, mlir::Type> | ||
width_array_len_restriction( | ||
tile, array_len, dataType, | ||
[](std::vector<uint32_t> tile, uint32_t array_len, | ||
mlir::Type dataType) { | ||
assert(tile.size() == 2); | ||
return tile[1] * array_len * | ||
(dataType.getIntOrFloatBitWidth() / 8) <= | ||
64; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: format issue. |
||
}); | ||
return width_array_len_restriction.validate(); | ||
} | ||
|
||
// Validate Surface Pitch restriction on a given tile | ||
bool validateSurfacePitchRestriction(std::vector<uint32_t> tile, | ||
uint32_t surfacePitch /*in bytes*/) { | ||
Restriction<std::vector<uint32_t>, uint32_t> surface_pitch_restriction( | ||
tile, surfacePitch, | ||
[](std::vector<uint32_t> tile, uint32_t surfacePitch) { | ||
assert(tile.size() == 2); | ||
return surfacePitch >= 64; | ||
}); | ||
return surface_pitch_restriction.validate(); | ||
} | ||
}; | ||
|
||
namespace PVCuArch { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: maybe we can remove PVCuArch namespace if it is not hard required. |
||
struct PVCuArch : public Xe2Plus { | ||
// Maintaines ownership of the instructions owned by PVUarch | ||
std::vector<std::shared_ptr<Instruction>> owned_instructions; | ||
PVCuArch() | ||
: Xe2Plus("pvc", // archName | ||
"Ponte Vecchio Architecture", // archDescription | ||
XeCoreInfo(8, SharedMemory(512 * 1024, 4), 8, 8), // xeCore | ||
{/* register_file_info */}, // Optional: empty | ||
{/* cache_info */}, // Optional: empty | ||
{/* instructions */}, // Optional: empty | ||
{/* restrictions */} // Optional: empty | ||
) { | ||
// Initialize uArchHierarchy | ||
this->uArch_hierarchy.push_back(uArchHierarchyComponent("thread", 0)); | ||
this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeCore", 8)); | ||
this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeSlice", 16)); | ||
this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeStack", 4)); | ||
this->uArch_hierarchy.push_back(uArchHierarchyComponent("gpu", 2)); | ||
// Intialize register file info | ||
// GRF | ||
this->register_file_info.emplace( | ||
"GRF", | ||
RegisterFileInfo(64 * 1024, // size in bits | ||
{"small", "large"}, // GRF modes | ||
{128, 256}, // registers per thread per mode | ||
0, // number of banks | ||
0 // bank size | ||
)); | ||
// Initialize cache info | ||
// L1 cache, XeCore level | ||
this->cache_info.push_back( | ||
CacheInfo(512 * 1024, 64, this->uArch_hierarchy[1])); | ||
// L3 cache, XeStack level | ||
this->cache_info.push_back( | ||
CacheInfo(512 * 1024, 64, this->uArch_hierarchy[3])); | ||
|
||
// Add the instructions | ||
auto dpas = std::make_shared<DPASInstruction>(); | ||
instructions.emplace(dpas->name, dpas); | ||
// instructions[dpas->name] = dpas.get(); | ||
owned_instructions.push_back(dpas); | ||
} | ||
}; | ||
} // namespace PVCuArch | ||
|
||
namespace BMGuArch { | ||
struct BMGuArch : public Xe2Plus { | ||
// Maintaines ownership of the instructions owned by PVUarch | ||
std::vector<std::shared_ptr<Instruction>> owned_instructions; | ||
BMGuArch() | ||
: Xe2Plus("bmg", // archName | ||
"Battlemage Architecture", // archDescription | ||
XeCoreInfo(8, SharedMemory(256 * 1024, 4), 8, 8), // xeCore | ||
{/* register_file_info */}, // Optional: empty | ||
{/* cache_info */}, // Optional: empty | ||
{/* instructions */}, // Optional: empty | ||
{/* restrictions */} // Optional: empty | ||
) { | ||
// Initialize uArchHierarchy | ||
this->uArch_hierarchy.push_back(uArchHierarchyComponent("thread", 0)); | ||
this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeCore", 8)); | ||
this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeSlice", 4)); | ||
this->uArch_hierarchy.push_back(uArchHierarchyComponent("XeStack", 5)); | ||
this->uArch_hierarchy.push_back(uArchHierarchyComponent("gpu", 1)); | ||
// Intialize register file info | ||
// GRF | ||
this->register_file_info["GRF"] = | ||
RegisterFileInfo(64 * 1024, // size in bits | ||
{"small", "large"}, // GRF modes | ||
{128, 256}, // registers per thread per mode | ||
0, // number of banks | ||
0 // bank size | ||
); | ||
// Initialize cache info | ||
// L1 cache, XeCore level | ||
this->cache_info.push_back( | ||
CacheInfo(256 * 1024, 64, this->uArch_hierarchy[1])); | ||
// L3 cache, XeStack level | ||
this->cache_info.push_back( | ||
CacheInfo(18 * 1024 * 1024, 256, this->uArch_hierarchy[3])); | ||
|
||
// Add the instructions | ||
auto dpas = std::make_shared<DPASInstruction>(); | ||
instructions.emplace(dpas->name, dpas); | ||
// instructions[dpas->name] = dpas.get(); | ||
owned_instructions.push_back(dpas); | ||
} | ||
}; | ||
} // namespace BMGuArch | ||
|
||
} // namespace Xe2Plus | ||
} // namespace uArch | ||
} // namespace xegpu | ||
} // namespace mlir | ||
|
||
#endif // MLIR_DIALECT_XEGPU_UTILS_INTEL_GPU_XE2_H |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I want to raise the following discussion points:
GPUModuleOp
, a finer granularity than us. It may be a requirement of GPU dialect. A potential benefit is that they may be able to support the case that a ModuleOp containing multiple GPUModuleOp, with each GPUMoudleOp map to different GPUs from different vendors.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sang Ik created this PR: llvm#147372. is it similar to the pass here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree, this appears similar to the xevm's attach target pass that is yet to be merged. After the merge, the
chip
string of the xevm target should be appropriate to query uarch info:Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do the XeVM targets have a sensible notion of a LLVM
triple
along side that ofchip
? How about (LLVM)features
? If so, it would probably make sense to have a#xevm.target
attribute, a la the proposed above, which implements theLLVMTargetAttrInterface
from this WIP upstream PR:https://github.com/llvm/llvm-project/pull/145899/files#diff-6c2503d165a7390c955c3c4fa4a76fd1991633b5ec597a1b1fd92731e1f3684dR572
I am in the process of making
#nvvm.target
and#rocdl.target
attributes implement this interface (in addition to the generic#llvm.target
attr in that PR). If sensible, the XeVM dialect should probably (try to) mirror this.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Given this in https://github.com/llvm/llvm-project/pull/147372/files#diff-3da7b8747032bb581f119fd49037d06706443c3c5db56d4d28cc74053a9b754dR284 in the Add xevm-attach-target transform pass PR, I take it there's a sensible triple.
I will have a look at making
xevm.target
also implement my interface.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks Chao, Rolf, Artem.
You are right once XeVM target attribute gets upstreamed, we should use it. This pass is here just to show the use case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍
On that note, I found that the
#xevm.target
attr itself already got merged: https://github.com/llvm/llvm-project/blob/b9d7513bf134febe72c05a04ff20f87191d7213a/mlir/include/mlir/Dialect/LLVMIR/XeVMOps.td#L521We can get cracking!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this code reflect the agreement to use #xevm.target?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, Jianhui. I removed this pass. We are using #xevm.target to get target device name now.