Skip to content

Commit a26ebb4

Browse files
authored
[CIR][CUDA] Support for built-in CUDA surface type (#1455)
1 parent 8883ebe commit a26ebb4

File tree

6 files changed

+50
-13
lines changed

6 files changed

+50
-13
lines changed

clang/lib/CIR/CodeGen/CIRGenModule.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -570,13 +570,13 @@ bool CIRGenModule::shouldEmitCUDAGlobalVar(const VarDecl *global) const {
570570
// their device-side incarnations.
571571

572572
if (global->hasAttr<CUDAConstantAttr>() ||
573-
global->getType()->isCUDADeviceBuiltinSurfaceType() ||
574573
global->getType()->isCUDADeviceBuiltinTextureType()) {
575574
llvm_unreachable("NYI");
576575
}
577576

578577
return !langOpts.CUDAIsDevice || global->hasAttr<CUDADeviceAttr>() ||
579-
global->hasAttr<CUDASharedAttr>();
578+
global->hasAttr<CUDASharedAttr>() ||
579+
global->getType()->isCUDADeviceBuiltinSurfaceType();
580580
}
581581

582582
void CIRGenModule::emitGlobal(GlobalDecl gd) {
@@ -1122,10 +1122,8 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty,
11221122
}
11231123
}
11241124

1125-
// TODO(cir): LLVM codegen makes sure the result is of the correct type
1126-
// by issuing a address space cast.
1127-
if (entryCIRAS != cirAS)
1128-
llvm_unreachable("NYI");
1125+
// Address space check removed because it is unnecessary because CIR records
1126+
// address space info in types.
11291127

11301128
// (If global is requested for a definition, we always need to create a new
11311129
// global, not just return a bitcast.)
@@ -1496,7 +1494,8 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *d,
14961494
// __shared__ variables is not marked as externally initialized,
14971495
// because they must not be initialized.
14981496
if (linkage != cir::GlobalLinkageKind::InternalLinkage &&
1499-
(d->hasAttr<CUDADeviceAttr>())) {
1497+
(d->hasAttr<CUDADeviceAttr>() ||
1498+
d->getType()->isCUDADeviceBuiltinSurfaceType())) {
15001499
gv->setAttr(CUDAExternallyInitializedAttr::getMnemonic(),
15011500
CUDAExternallyInitializedAttr::get(&getMLIRContext()));
15021501
}

clang/lib/CIR/CodeGen/CIRGenTypes.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -352,10 +352,14 @@ mlir::Type CIRGenTypes::convertType(QualType T) {
352352
// 1. There is no SurfaceType on HIP,
353353
// 2. There is Texture memory on HIP but accessing the memory goes through
354354
// calls to the runtime. e.g. for a 2D: `tex2D<float>(tex, x, y);`
355-
if (astContext.getLangOpts().CUDA && astContext.getLangOpts().CUDAIsDevice) {
356-
if (Ty->isCUDADeviceBuiltinSurfaceType() ||
357-
Ty->isCUDADeviceBuiltinTextureType())
355+
if (astContext.getLangOpts().CUDAIsDevice) {
356+
if (T->isCUDADeviceBuiltinSurfaceType()) {
357+
if (mlir::Type Ty =
358+
CGM.getTargetCIRGenInfo().getCUDADeviceBuiltinSurfaceDeviceType())
359+
return Ty;
360+
} else if (T->isCUDADeviceBuiltinTextureType()) {
358361
llvm_unreachable("NYI");
362+
}
359363
}
360364

361365
if (const auto *recordType = dyn_cast<RecordType>(T))

clang/lib/CIR/CodeGen/TargetInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
12
#include "TargetInfo.h"
23
#include "ABIInfo.h"
34
#include "CIRGenCXXABI.h"
@@ -344,6 +345,11 @@ class NVPTXTargetCIRGenInfo : public TargetCIRGenInfo {
344345
public:
345346
NVPTXTargetCIRGenInfo(CIRGenTypes &cgt)
346347
: TargetCIRGenInfo(std::make_unique<NVPTXABIInfo>(cgt)) {}
348+
mlir::Type getCUDADeviceBuiltinSurfaceDeviceType() const override {
349+
// On the device side, texture reference is represented as an object handle
350+
// in 64-bit integer.
351+
return cir::IntType::get(&getABIInfo().CGT.getMLIRContext(), 64, true);
352+
}
347353
};
348354

349355
} // namespace

clang/lib/CIR/CodeGen/TargetInfo.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,9 @@ class TargetCIRGenInfo {
120120
// kernels. They should reset the calling convention to OpenCLKernel,
121121
// which will be further resolved by getOpenCLKernelCallingConv().
122122
virtual void setCUDAKernelCallingConvention(const FunctionType *&ft) const {}
123-
123+
virtual mlir::Type getCUDADeviceBuiltinSurfaceDeviceType() const {
124+
return nullptr;
125+
}
124126
virtual ~TargetCIRGenInfo() {}
125127
};
126128

clang/lib/CIR/Dialect/IR/CIRAttrs.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -657,10 +657,10 @@ AddressSpaceAttr::getValueFromLangAS(clang::LangAS langAS) {
657657
return Kind::offload_private;
658658
case LangAS::opencl_generic:
659659
return Kind::offload_generic;
660-
660+
case LangAS::cuda_constant:
661+
return Kind::offload_constant;
661662
case LangAS::opencl_global_device:
662663
case LangAS::opencl_global_host:
663-
case LangAS::cuda_constant:
664664
case LangAS::sycl_global:
665665
case LangAS::sycl_global_device:
666666
case LangAS::sycl_global_host:
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// REQUIRES: x86-registered-target
2+
// REQUIRES: nvptx-registered-target
3+
4+
// RUN: %clang_cc1 -fclangir -std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - %s | FileCheck --check-prefix=DEVICE-LLVM %s
5+
// RUN: %clang_cc1 -fclangir -std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-cir -o - %s | FileCheck --check-prefix=DEVICE-CIR %s
6+
// RUN: echo "GPU binary would be here" > %t
7+
// RUN: %clang_cc1 -fclangir -std=c++11 -triple x86_64-unknown-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s
8+
9+
struct surfaceReference {
10+
int desc;
11+
};
12+
13+
template <typename T, int dim = 1>
14+
struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {
15+
};
16+
17+
// Partial specialization over `void`.
18+
template<int dim>
19+
struct __attribute__((device_builtin_surface_type)) surface<void, dim> : public surfaceReference {
20+
};
21+
22+
surface<void, 2> surf;
23+
24+
// DEVICE-LLVM: @surf = addrspace(1) externally_initialized global i64 undef, align 4
25+
// DEVICE-CIR: cir.global external addrspace(offload_global) @surf = #cir.undef : !s64i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized}
26+
// HOST: @surf = global %"struct.surface<void, 2>" zeroinitializer, align 4

0 commit comments

Comments
 (0)