Skip to content

Commit 809c370

Browse files
JamesL425lanza
authored andcommitted
[CIR][CUDA] support for device variables (#1394)
1 parent f0a74e0 commit 809c370

File tree

4 files changed

+53
-12
lines changed

4 files changed

+53
-12
lines changed

clang/lib/CIR/CodeGen/CIRGenModule.cpp

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,23 @@ const ABIInfo &CIRGenModule::getABIInfo() {
510510
return getTargetCIRGenInfo().getABIInfo();
511511
}
512512

513+
bool CIRGenModule::shouldEmitCUDAGlobalVar(const VarDecl *global) const {
514+
assert(langOpts.CUDA && "Should not be called by non-CUDA languages");
515+
// We need to emit host-side 'shadows' for all global
516+
// device-side variables because the CUDA runtime needs their
517+
// size and host-side address in order to provide access to
518+
// their device-side incarnations.
519+
520+
if (global->hasAttr<CUDAConstantAttr>() ||
521+
global->hasAttr<CUDASharedAttr>() ||
522+
global->getType()->isCUDADeviceBuiltinSurfaceType() ||
523+
global->getType()->isCUDADeviceBuiltinTextureType()) {
524+
llvm_unreachable("NYI");
525+
}
526+
527+
return !langOpts.CUDAIsDevice || global->hasAttr<CUDADeviceAttr>();
528+
}
529+
513530
void CIRGenModule::emitGlobal(GlobalDecl GD) {
514531
llvm::TimeTraceScope scope("build CIR Global", [&]() -> std::string {
515532
auto *ND = dyn_cast<NamedDecl>(GD.getDecl());
@@ -554,8 +571,10 @@ void CIRGenModule::emitGlobal(GlobalDecl GD) {
554571
}
555572
}
556573

557-
if (dyn_cast<VarDecl>(Global))
558-
llvm_unreachable("NYI");
574+
if (const auto *VD = dyn_cast<VarDecl>(Global)) {
575+
if (!shouldEmitCUDAGlobalVar(VD))
576+
return;
577+
}
559578
}
560579

561580
if (langOpts.OpenMP) {
@@ -599,7 +618,6 @@ void CIRGenModule::emitGlobal(GlobalDecl GD) {
599618
return;
600619
}
601620
} else {
602-
assert(!langOpts.CUDA && "NYI");
603621
const auto *VD = cast<VarDecl>(Global);
604622
assert(VD->isFileVarDecl() && "Cannot emit local var decl as global.");
605623
if (VD->isThisDeclarationADefinition() != VarDecl::Definition &&
@@ -1149,8 +1167,11 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef MangledName, mlir::Type Ty,
11491167

11501168
// External HIP managed variables needed to be recorded for transformation
11511169
// in both device and host compilations.
1152-
if (getLangOpts().CUDA)
1153-
assert(0 && "not implemented");
1170+
// External HIP managed variables needed to be recorded for transformation
1171+
// in both device and host compilations.
1172+
if (getLangOpts().CUDA && D && D->hasAttr<HIPManagedAttr>() &&
1173+
D->hasExternalStorage())
1174+
llvm_unreachable("NYI");
11541175
}
11551176

11561177
// TODO(cir): address space cast when needed for DAddrSpace.
@@ -1422,9 +1443,6 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *D,
14221443
// the device. [...]"
14231444
// CUDA B.2.2 "The __constant__ qualifier, optionally used together with
14241445
// __device__, declares a variable that: [...]
1425-
if (GV && getLangOpts().CUDA) {
1426-
assert(0 && "not implemented");
1427-
}
14281446

14291447
// Set initializer and finalize emission
14301448
CIRGenModule::setInitializer(GV, Init);
@@ -4012,9 +4030,17 @@ LangAS CIRGenModule::getGlobalVarAddressSpace(const VarDecl *D) {
40124030
llvm_unreachable("NYI");
40134031

40144032
if (langOpts.CUDA && langOpts.CUDAIsDevice) {
4015-
if (D && D->hasAttr<CUDASharedAttr>())
4016-
return LangAS::cuda_shared;
4017-
llvm_unreachable("NYI");
4033+
if (D) {
4034+
if (D->hasAttr<CUDAConstantAttr>())
4035+
return LangAS::cuda_constant;
4036+
if (D->hasAttr<CUDASharedAttr>())
4037+
return LangAS::cuda_shared;
4038+
if (D->hasAttr<CUDADeviceAttr>())
4039+
return LangAS::cuda_device;
4040+
if (D->getType().isConstQualified())
4041+
return LangAS::cuda_constant;
4042+
}
4043+
return LangAS::cuda_device;
40184044
}
40194045

40204046
if (langOpts.OpenMP)

clang/lib/CIR/CodeGen/CIRGenModule.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,9 @@ class CIRGenModule : public CIRGenTypeCache {
271271

272272
// Return whether structured convergence intrinsics should be generated for
273273
// this target.
274+
275+
bool shouldEmitCUDAGlobalVar(const VarDecl *global) const;
276+
274277
bool shouldEmitConvergenceTokens() const {
275278
// TODO: this shuld probably become unconditional once the controlled
276279
// convergence becomes the norm.

clang/lib/CIR/Dialect/IR/CIRAttrs.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,8 @@ AddressSpaceAttr::getValueFromLangAS(clang::LangAS langAS) {
649649
// Local means local among the work-group (OpenCL) or block (CUDA).
650650
// All threads inside the kernel can access local memory.
651651
return Kind::offload_local;
652+
case LangAS::cuda_device:
653+
return Kind::offload_global;
652654
case LangAS::opencl_constant:
653655
return Kind::offload_constant;
654656
case LangAS::opencl_private:
@@ -658,7 +660,6 @@ AddressSpaceAttr::getValueFromLangAS(clang::LangAS langAS) {
658660

659661
case LangAS::opencl_global_device:
660662
case LangAS::opencl_global_host:
661-
case LangAS::cuda_device:
662663
case LangAS::cuda_constant:
663664
case LangAS::sycl_global:
664665
case LangAS::sycl_global_device:
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#include "../Inputs/cuda.h"
2+
3+
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
4+
// RUN: -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
5+
// RUN: %s -o %t.cir
6+
// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
7+
8+
9+
__device__ int a;
10+
11+
// CIR-DEVICE: cir.global external addrspace(offload_global) @a = #cir.int<0> : !s32i {alignment = 4 : i64} loc(#loc3)

0 commit comments

Comments
 (0)