Skip to content

Commit 48babe1

Browse files
authored
[mlir][LLVM] Add LLVMAddrSpaceAttrInterface and NVVMMemorySpaceAttr (#157339)
This patch introduces the `LLVMAddrSpaceAttrInterface` for defining compatible LLVM address space attributes To test this interface, this patch also adds: - Adds NVVMMemorySpaceAttr implementing both LLVMAddrSpaceAttrInterface and MemorySpaceAttrInterface - Converts NVVM memory space constants from enum to MLIR enums - Updates all NVVM memory space references to use new attribute system - Adds support for NVVM memory spaces in ptr dialect translation Example: ```mlir llvm.func @nvvm_ptr_address_space( !ptr.ptr<#nvvm.memory_space<global>>, !ptr.ptr<#nvvm.memory_space<shared>>, !ptr.ptr<#nvvm.memory_space<constant>>, !ptr.ptr<#nvvm.memory_space<local>>, !ptr.ptr<#nvvm.memory_space<tensor>>, !ptr.ptr<#nvvm.memory_space<shared_cluster>> ) -> !ptr.ptr<#nvvm.memory_space<generic>> ``` Translating the above code to LLVM produces: ```llvm declare ptr @nvvm_ptr_address_space(ptr addrspace(1), ptr addrspace(3), ptr addrspace(4), ptr addrspace(5), ptr addrspace(6), ptr addrspace(7)) ``` To convert the memory space enum to the new enum class use: ```bash grep -r . -e "NVVMMemorySpace::kGenericMemorySpace" -l | xargs sed -i -e "s/NVVMMemorySpace::kGenericMemorySpace/NVVMMemorySpace::Generic/g" grep -r . -e "NVVMMemorySpace::kGlobalMemorySpace" -l | xargs sed -i -e "s/NVVMMemorySpace::kGlobalMemorySpace/NVVMMemorySpace::Global/g" grep -r . -e "NVVMMemorySpace::kSharedMemorySpace" -l | xargs sed -i -e "s/NVVMMemorySpace::kSharedMemorySpace/NVVMMemorySpace::Shared/g" grep -r . -e "NVVMMemorySpace::kConstantMemorySpace" -l | xargs sed -i -e "s/NVVMMemorySpace::kConstantMemorySpace/NVVMMemorySpace::Constant/g" grep -r . -e "NVVMMemorySpace::kLocalMemorySpace" -l | xargs sed -i -e "s/NVVMMemorySpace::kLocalMemorySpace/NVVMMemorySpace::Local/g" grep -r . -e "NVVMMemorySpace::kTensorMemorySpace" -l | xargs sed -i -e "s/NVVMMemorySpace::kTensorMemorySpace/NVVMMemorySpace::Tensor/g" grep -r . -e "NVVMMemorySpace::kSharedClusterMemorySpace" -l | xargs sed -i -e "s/NVVMMemorySpace::kSharedClusterMemorySpace/NVVMMemorySpace::SharedCluster/g" ``` NOTE: A future patch will add support for ROCDL, it wasn't added here to keep the patch small.
1 parent 9ee1f15 commit 48babe1

File tree

18 files changed

+220
-77
lines changed

18 files changed

+220
-77
lines changed

flang/lib/Optimizer/CodeGen/CodeGen.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3211,7 +3211,8 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
32113211

32123212
if (global.getDataAttr() &&
32133213
*global.getDataAttr() == cuf::DataAttribute::Shared)
3214-
g.setAddrSpace(mlir::NVVM::NVVMMemorySpace::kSharedMemorySpace);
3214+
g.setAddrSpace(
3215+
static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Shared));
32153216

32163217
rewriter.eraseOp(global);
32173218
return mlir::success();

flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,8 @@ static mlir::Value createAddressOfOp(mlir::ConversionPatternRewriter &rewriter,
221221
gpu::GPUModuleOp gpuMod,
222222
std::string &sharedGlobalName) {
223223
auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(
224-
rewriter.getContext(), mlir::NVVM::NVVMMemorySpace::kSharedMemorySpace);
224+
rewriter.getContext(),
225+
static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Shared));
225226
if (auto g = gpuMod.lookupSymbol<fir::GlobalOp>(sharedGlobalName))
226227
return mlir::LLVM::AddressOfOp::create(rewriter, loc, llvmPtrTy,
227228
g.getSymName());

mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class LLVM_Attr<string name, string attrMnemonic,
3030

3131
def LLVM_AddressSpaceAttr :
3232
LLVM_Attr<"AddressSpace", "address_space", [
33+
LLVM_LLVMAddrSpaceAttrInterface,
3334
DeclareAttrInterfaceMethods<MemorySpaceAttrInterface>
3435
]> {
3536
let summary = "LLVM address space";

mlir/include/mlir/Dialect/LLVMIR/LLVMAttrs.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,14 @@ class TBAANodeAttr : public Attribute {
9393
using cconv::CConv;
9494
using linkage::Linkage;
9595
using tailcallkind::TailCallKind;
96+
97+
namespace detail {
98+
/// Checks whether the given type is an LLVM type that can be loaded or stored.
99+
bool isValidLoadStoreImpl(Type type, ptr::AtomicOrdering ordering,
100+
std::optional<int64_t> alignment,
101+
const ::mlir::DataLayout *dataLayout,
102+
function_ref<InFlightDiagnostic()> emitError);
103+
} // namespace detail
96104
} // namespace LLVM
97105
} // namespace mlir
98106

mlir/include/mlir/Dialect/LLVMIR/LLVMInterfaces.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,24 @@ def LLVM_DIRecursiveTypeAttrInterface
533533
];
534534
}
535535

536+
def LLVM_LLVMAddrSpaceAttrInterface :
537+
AttrInterface<"LLVMAddrSpaceAttrInterface"> {
538+
let description = [{
539+
An interface for attributes that represent LLVM address spaces.
540+
Implementing attributes should provide access to the address space value
541+
as an unsigned integer.
542+
}];
543+
let cppNamespace = "::mlir::LLVM";
544+
let methods = [
545+
InterfaceMethod<
546+
/*description=*/"Returns the address space as an unsigned integer.",
547+
/*retTy=*/"unsigned",
548+
/*methodName=*/"getAddressSpace",
549+
/*args=*/(ins)
550+
>
551+
];
552+
}
553+
536554
def LLVM_TargetAttrInterface
537555
: AttrInterface<"TargetAttrInterface", [DLTIQueryInterface]> {
538556
let description = [{

mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.h"
2020
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
2121
#include "mlir/Dialect/LLVMIR/NVVMRequiresSMTraits.h"
22+
#include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h"
2223
#include "mlir/IR/Dialect.h"
2324
#include "mlir/IR/OpDefinition.h"
2425
#include "mlir/Interfaces/InferIntRangeInterface.h"
@@ -30,31 +31,23 @@
3031

3132
namespace mlir {
3233
namespace NVVM {
34+
/// Utility functions to compare NVVMMemorySpace with unsigned values.
35+
inline bool operator==(unsigned as, NVVMMemorySpace memSpace) {
36+
return as == static_cast<unsigned>(memSpace);
37+
}
38+
inline bool operator==(NVVMMemorySpace memSpace, unsigned as) {
39+
return static_cast<unsigned>(memSpace) == as;
40+
}
41+
inline bool operator!=(unsigned as, NVVMMemorySpace memSpace) {
42+
return as != static_cast<unsigned>(memSpace);
43+
}
44+
inline bool operator!=(NVVMMemorySpace memSpace, unsigned as) {
45+
return static_cast<unsigned>(memSpace) != as;
46+
}
3347

3448
// Shared memory has 128-bit alignment
3549
constexpr int kSharedMemoryAlignmentBit = 128;
3650

37-
/// NVVM memory space identifiers.
38-
enum NVVMMemorySpace {
39-
/// Generic memory space identifier.
40-
kGenericMemorySpace = 0,
41-
/// Global memory space identifier.
42-
kGlobalMemorySpace = 1,
43-
/// Shared memory space identifier.
44-
kSharedMemorySpace = 3,
45-
/// Constant memory space identifier.
46-
kConstantMemorySpace = 4,
47-
/// Local memory space identifier.
48-
kLocalMemorySpace = 5,
49-
/// Tensor memory space identifier.
50-
/// Tensor memory is available only in arch-accelerated
51-
/// variants from sm100 onwards.
52-
kTensorMemorySpace = 6,
53-
/// Distributed shared memory space identifier.
54-
/// Distributed shared memory is available only in sm90+.
55-
kSharedClusterMemorySpace = 7,
56-
};
57-
5851
/// A pair type of LLVM's Intrinsic ID and args (which are llvm values).
5952
/// This type is returned by the getIntrinsicIDAndArgs() methods.
6053
using IDArgPair =

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ include "mlir/IR/EnumAttr.td"
1717
include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td"
1818
include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
1919
include "mlir/Dialect/LLVMIR/NVVMRequiresSMTraits.td"
20+
include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.td"
2021
include "mlir/Interfaces/SideEffectInterfaces.td"
2122
include "mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.td"
2223
include "mlir/Interfaces/InferIntRangeInterface.td"
@@ -192,6 +193,40 @@ def CacheEvictionPriorityAttr : EnumAttr<NVVM_Dialect, CacheEvictionPriority,
192193
let assemblyFormat = "$value";
193194
}
194195

196+
// Memory Space enum definitions
197+
/// Generic memory space identifier.
198+
def MemSpaceGeneric : I32EnumCase<"Generic", 0, "generic">;
199+
/// Global memory space identifier.
200+
def MemSpaceGlobal : I32EnumCase<"Global", 1, "global">;
201+
/// Shared memory space identifier.
202+
def MemSpaceShared : I32EnumCase<"Shared", 3, "shared">;
203+
/// Constant memory space identifier.
204+
def MemSpaceConstant : I32EnumCase<"Constant", 4, "constant">;
205+
/// Local memory space identifier.
206+
def MemSpaceLocal : I32EnumCase<"Local", 5, "local">;
207+
/// Tensor memory space identifier.
208+
/// Tensor memory is available only in arch-accelerated
209+
/// variants from sm100 onwards.
210+
def MemSpaceTensor : I32EnumCase<"Tensor", 6, "tensor">;
211+
/// Distributed shared memory space identifier.
212+
/// Distributed shared memory is available only in sm90+.
213+
def MemSpaceSharedCluster : I32EnumCase<"SharedCluster", 7, "shared_cluster">;
214+
215+
def NVVMMemorySpace : I32Enum<"NVVMMemorySpace", "NVVM Memory Space",
216+
[MemSpaceGeneric, MemSpaceGlobal, MemSpaceShared,
217+
MemSpaceConstant, MemSpaceLocal, MemSpaceTensor,
218+
MemSpaceSharedCluster]> {
219+
let cppNamespace = "::mlir::NVVM";
220+
}
221+
222+
def NVVMMemorySpaceAttr :
223+
EnumAttr<NVVM_Dialect, NVVMMemorySpace, "memory_space", [
224+
DeclareAttrInterfaceMethods<LLVM_LLVMAddrSpaceAttrInterface>,
225+
DeclareAttrInterfaceMethods<MemorySpaceAttrInterface>
226+
]> {
227+
let assemblyFormat = "`<` $value `>`";
228+
}
229+
195230
//===----------------------------------------------------------------------===//
196231
// NVVM intrinsic operations
197232
//===----------------------------------------------------------------------===//
@@ -3592,7 +3627,7 @@ def NVVM_MapaOp: NVVM_Op<"mapa",
35923627
string llvmBuilder = [{
35933628
int addrSpace = llvm::cast<LLVMPointerType>(op.getA().getType()).getAddressSpace();
35943629

3595-
bool isSharedMemory = addrSpace == NVVM::NVVMMemorySpace::kSharedMemorySpace;
3630+
bool isSharedMemory = addrSpace == static_cast<int> (NVVM::NVVMMemorySpace::Shared);
35963631

35973632
auto intId = isSharedMemory? llvm::Intrinsic::nvvm_mapa_shared_cluster : llvm::Intrinsic::nvvm_mapa;
35983633
$res = createIntrinsicCall(builder, intId, {$a, $b});

mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -451,16 +451,14 @@ void mlir::configureGpuToNVVMTypeConverter(LLVMTypeConverter &converter) {
451451
converter, [](gpu::AddressSpace space) -> unsigned {
452452
switch (space) {
453453
case gpu::AddressSpace::Global:
454-
return static_cast<unsigned>(
455-
NVVM::NVVMMemorySpace::kGlobalMemorySpace);
454+
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Global);
456455
case gpu::AddressSpace::Workgroup:
457-
return static_cast<unsigned>(
458-
NVVM::NVVMMemorySpace::kSharedMemorySpace);
456+
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared);
459457
case gpu::AddressSpace::Private:
460458
return 0;
461459
}
462460
llvm_unreachable("unknown address space enum value");
463-
return 0;
461+
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Generic);
464462
});
465463
// Lowering for MMAMatrixType.
466464
converter.addConversion([&](gpu::MMAMatrixType type) -> Type {
@@ -648,7 +646,7 @@ void mlir::populateGpuToNVVMConversionPatterns(
648646
GPUFuncOpLoweringOptions{
649647
/*allocaAddrSpace=*/0,
650648
/*workgroupAddrSpace=*/
651-
static_cast<unsigned>(NVVM::NVVMMemorySpace::kSharedMemorySpace),
649+
static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared),
652650
StringAttr::get(&converter.getContext(),
653651
NVVM::NVVMDialect::getKernelFuncAttrName()),
654652
StringAttr::get(&converter.getContext(),

mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -405,16 +405,14 @@ struct ConvertNVGPUToNVVMPass
405405
converter, [](gpu::AddressSpace space) -> unsigned {
406406
switch (space) {
407407
case gpu::AddressSpace::Global:
408-
return static_cast<unsigned>(
409-
NVVM::NVVMMemorySpace::kGlobalMemorySpace);
408+
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Global);
410409
case gpu::AddressSpace::Workgroup:
411-
return static_cast<unsigned>(
412-
NVVM::NVVMMemorySpace::kSharedMemorySpace);
410+
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared);
413411
case gpu::AddressSpace::Private:
414412
return 0;
415413
}
416414
llvm_unreachable("unknown address space enum value");
417-
return 0;
415+
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Generic);
418416
});
419417
/// device-side async tokens cannot be materialized in nvvm. We just
420418
/// convert them to a dummy i32 type in order to easily drop them during
@@ -677,7 +675,7 @@ struct NVGPUAsyncCopyLowering
677675
adaptor.getSrcIndices());
678676
// Intrinsics takes a global pointer so we need an address space cast.
679677
auto srcPointerGlobalType = LLVM::LLVMPointerType::get(
680-
op->getContext(), NVVM::NVVMMemorySpace::kGlobalMemorySpace);
678+
op->getContext(), static_cast<unsigned>(NVVM::NVVMMemorySpace::Global));
681679
scrPtr = LLVM::AddrSpaceCastOp::create(b, srcPointerGlobalType, scrPtr);
682680
int64_t dstElements = adaptor.getDstElements().getZExtValue();
683681
int64_t sizeInBytes =

mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,16 +71,14 @@ void transform::ApplyGPUToNVVMConversionPatternsOp::populatePatterns(
7171
llvmTypeConverter, [](AddressSpace space) -> unsigned {
7272
switch (space) {
7373
case AddressSpace::Global:
74-
return static_cast<unsigned>(
75-
NVVM::NVVMMemorySpace::kGlobalMemorySpace);
74+
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Global);
7675
case AddressSpace::Workgroup:
77-
return static_cast<unsigned>(
78-
NVVM::NVVMMemorySpace::kSharedMemorySpace);
76+
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared);
7977
case AddressSpace::Private:
8078
return 0;
8179
}
8280
llvm_unreachable("unknown address space enum value");
83-
return 0;
81+
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Generic);
8482
});
8583
// Used in GPUToNVVM/WmmaOpsToNvvm.cpp so attaching here for now.
8684
// TODO: We should have a single to_nvvm_type_converter.

0 commit comments

Comments
 (0)