diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index ae14d74f2d915..23a40b8f7c32a 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -1221,12 +1221,34 @@ void CGNVCUDARuntime::createOffloadingEntries() { ? static_cast(llvm::offloading::OffloadGlobalNormalized) : 0); if (I.Flags.getKind() == DeviceVarFlags::Variable) { - llvm::offloading::emitOffloadingEntry( - M, I.Var, getDeviceSideName(I.D), VarSize, - (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry - : llvm::offloading::OffloadGlobalEntry) | - Flags, - /*Data=*/0, Section); + // TODO: Update the offloading entries struct to avoid this indirection. + if (I.Flags.isManaged()) { + assert(I.Var->getName().ends_with(".managed") && + "HIP managed variables not transformed"); + + // Create a struct to contain the two variables. + auto *ManagedVar = M.getNamedGlobal( + I.Var->getName().drop_back(StringRef(".managed").size())); + llvm::Constant *StructData[] = {ManagedVar, I.Var}; + llvm::Constant *Initializer = llvm::ConstantStruct::get( + llvm::offloading::getManagedTy(M), StructData); + auto *Struct = new llvm::GlobalVariable( + M, llvm::offloading::getManagedTy(M), + /*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage, Initializer, + I.Var->getName(), /*InsertBefore=*/nullptr, + llvm::GlobalVariable::NotThreadLocal, + M.getDataLayout().getDefaultGlobalsAddressSpace()); + + llvm::offloading::emitOffloadingEntry( + M, Struct, getDeviceSideName(I.D), VarSize, + llvm::offloading::OffloadGlobalManagedEntry | Flags, + /*Data=*/static_cast(I.Var->getAlignment()), Section); + } else { + llvm::offloading::emitOffloadingEntry( + M, I.Var, getDeviceSideName(I.D), VarSize, + llvm::offloading::OffloadGlobalEntry | Flags, + /*Data=*/0, Section); + } } else if (I.Flags.getKind() == DeviceVarFlags::Surface) { llvm::offloading::emitOffloadingEntry( M, I.Var, getDeviceSideName(I.D), VarSize, diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu b/clang/test/CodeGenCUDA/offloading-entries.cu index 259e3324e8ac9..d46a25969e3ec 100644 --- a/clang/test/CodeGenCUDA/offloading-entries.cu +++ b/clang/test/CodeGenCUDA/offloading-entries.cu @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" "managed.*" // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \ // RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \ // RUN: --check-prefix=CUDA %s @@ -14,50 +14,68 @@ #include "Inputs/cuda.h" +#define __managed__ __attribute__((managed)) + //. +// CUDA: @managed = global i32 undef, align 4 // CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1 // CUDA: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1 // CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1 // CUDA: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1 // CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1 // CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1 -// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1 -// CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries", align 1 -// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1 -// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries", align 1 +// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1 +// CUDA: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed, ptr @.offloading.entry_name.3, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1 +// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1 +// CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.4, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries", align 1 +// CUDA: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1 +// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.5, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries", align 1 //. +// HIP: @managed.managed = global i32 0, align 4 +// HIP: @managed = externally_initialized global ptr null // HIP: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1 // HIP: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1 // HIP: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1 // HIP: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1 // HIP: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1 // HIP: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries", align 1 -// HIP: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1 -// HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries", align 1 -// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1 -// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries", align 1 +// HIP: @managed.managed.3 = private constant %struct.__managed_var { ptr @managed, ptr @managed.managed } +// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1 +// HIP: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed.managed.3, ptr @.offloading.entry_name.4, i64 4, i32 1, i32 4 }, section "hip_offloading_entries", align 1 +// HIP: @.offloading.entry_name.5 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1 +// HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.5, i64 4, i32 2, i32 1 }, section "hip_offloading_entries", align 1 +// HIP: @.offloading.entry_name.6 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1 +// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.6, i64 4, i32 3, i32 1 }, section "hip_offloading_entries", align 1 //. +// CUDA-COFF: @managed = dso_local global i32 undef, align 4 // CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1 // CUDA-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1 // CUDA-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1 // CUDA-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1 // CUDA-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1 // CUDA-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1 -// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1 -// CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries$OE", align 1 -// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1 -// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries$OE", align 1 +// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1 +// CUDA-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed, ptr @.offloading.entry_name.3, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1 +// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1 +// CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.4, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries$OE", align 1 +// CUDA-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1 +// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.5, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries$OE", align 1 //. +// HIP-COFF: @managed.managed = dso_local global i32 0, align 4 +// HIP-COFF: @managed = dso_local externally_initialized global ptr null // HIP-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1 // HIP-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1 // HIP-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1 // HIP-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1 // HIP-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1 // HIP-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1 -// HIP-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1 -// HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries$OE", align 1 -// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1 -// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries$OE", align 1 +// HIP-COFF: @managed.managed.3 = private constant %struct.__managed_var { ptr @managed, ptr @managed.managed } +// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1 +// HIP-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed.managed.3, ptr @.offloading.entry_name.4, i64 4, i32 1, i32 4 }, section "hip_offloading_entries$OE", align 1 +// HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1 +// HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.5, i64 4, i32 2, i32 1 }, section "hip_offloading_entries$OE", align 1 +// HIP-COFF: @.offloading.entry_name.6 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1 +// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.6, i64 4, i32 3, i32 1 }, section "hip_offloading_entries$OE", align 1 //. // CUDA-LABEL: @_Z18__device_stub__foov( // CUDA-NEXT: entry: @@ -91,6 +109,7 @@ __global__ void foo() {} __device__ int var = 1; const __device__ int constant = 1; extern __device__ int external; +__device__ __managed__ int managed = 0; // CUDA-LABEL: @_Z21__device_stub__kernelv( // CUDA-NEXT: entry: @@ -137,28 +156,3 @@ template struct __attribute__((device_builtin_texture_type)) texture : public textureReference {}; texture tex; -//. -// CUDA: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name} -// CUDA: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1} -// CUDA: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2} -// CUDA: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3} -// CUDA: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4} -//. -// HIP: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name} -// HIP: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1} -// HIP: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2} -// HIP: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3} -// HIP: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4} -//. -// CUDA-COFF: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name} -// CUDA-COFF: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1} -// CUDA-COFF: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2} -// CUDA-COFF: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3} -// CUDA-COFF: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4} -//. -// HIP-COFF: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name} -// HIP-COFF: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1} -// HIP-COFF: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2} -// HIP-COFF: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3} -// HIP-COFF: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4} -//. diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c index f553f20f7ee89..7f96f629e9127 100644 --- a/clang/test/Driver/linker-wrapper-image.c +++ b/clang/test/Driver/linker-wrapper-image.c @@ -87,7 +87,7 @@ // CUDA-NEXT: br i1 %1, label %while.entry, label %while.end // CUDA: while.entry: -// CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %12, %if.end ] +// CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %13, %if.end ] // CUDA-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 0 // CUDA-NEXT: %addr = load ptr, ptr %2, align 8 // CUDA-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 1 @@ -125,7 +125,11 @@ // CUDA-NEXT: br label %if.end // CUDA: sw.managed: -// CUDA-NEXT: br label %if.end +// CUDA-NEXT: %managed.addr = load ptr, ptr %addr, align 8 +// CUDA-NEXT: %12 = getelementptr inbounds ptr, ptr %addr, i64 1 +// CUDA-NEXT: %managed.addr2 = load ptr, ptr %12, align 8 +// CUDA-NEXT: call void @__cudaRegisterManagedVar(ptr %0, ptr %managed.addr, ptr %managed.addr2, ptr %name, i64 %size, i32 %textype) +// CUDA-NEXT: br label %if.end // CUDA: sw.surface: // CUDA-NEXT: br label %if.end @@ -134,9 +138,9 @@ // CUDA-NEXT: br label %if.end // CUDA: if.end: -// CUDA-NEXT: %12 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1 -// CUDA-NEXT: %13 = icmp eq ptr %12, @__stop_cuda_offloading_entries -// CUDA-NEXT: br i1 %13, label %while.end, label %while.entry +// CUDA-NEXT: %13 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1 +// CUDA-NEXT: %14 = icmp eq ptr %13, @__stop_cuda_offloading_entries +// CUDA-NEXT: br i1 %14, label %while.end, label %while.entry // CUDA: while.end: // CUDA-NEXT: ret void @@ -187,7 +191,7 @@ // HIP-NEXT: br i1 %1, label %while.entry, label %while.end // HIP: while.entry: -// HIP-NEXT: %entry1 = phi ptr [ @__start_hip_offloading_entries, %entry ], [ %12, %if.end ] +// HIP-NEXT: %entry1 = phi ptr [ @__start_hip_offloading_entries, %entry ], [ %13, %if.end ] // HIP-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 0 // HIP-NEXT: %addr = load ptr, ptr %2, align 8 // HIP-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 1 @@ -225,7 +229,11 @@ // HIP-NEXT: br label %if.end // HIP: sw.managed: -// HIP-NEXT: br label %if.end +// HIP-NEXT: %managed.addr = load ptr, ptr %addr, align 8 +// HIP-NEXT: %12 = getelementptr inbounds ptr, ptr %addr, i64 1 +// HIP-NEXT: %managed.addr2 = load ptr, ptr %12, align 8 +// HIP-NEXT: call void @__hipRegisterManagedVar(ptr %0, ptr %managed.addr, ptr %managed.addr2, ptr %name, i64 %size, i32 %textype) +// HIP-NEXT: br label %if.end // HIP: sw.surface: // HIP-NEXT: call void @__hipRegisterSurface(ptr %0, ptr %addr, ptr %name, ptr %name, i32 %textype, i32 %extern) @@ -236,9 +244,9 @@ // HIP-NEXT: br label %if.end // HIP: if.end: -// HIP-NEXT: %12 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1 -// HIP-NEXT: %13 = icmp eq ptr %12, @__stop_hip_offloading_entries -// HIP-NEXT: br i1 %13, label %while.end, label %while.entry +// HIP-NEXT: %13 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1 +// HIP-NEXT: %14 = icmp eq ptr %13, @__stop_hip_offloading_entries +// HIP-NEXT: br i1 %14, label %while.end, label %while.entry // HIP: while.end: // HIP-NEXT: ret void diff --git a/llvm/include/llvm/Frontend/Offloading/Utility.h b/llvm/include/llvm/Frontend/Offloading/Utility.h index f0bde5d81ef6d..ddcf0a946d635 100644 --- a/llvm/include/llvm/Frontend/Offloading/Utility.h +++ b/llvm/include/llvm/Frontend/Offloading/Utility.h @@ -55,6 +55,10 @@ enum OffloadEntryKindFlag : uint32_t { /// globals that will be registered with the offloading runtime. StructType *getEntryTy(Module &M); +/// Returns the struct type we store the two pointers for CUDA / HIP managed +/// variables in. Necessary until we widen the offload entry struct. +StructType *getManagedTy(Module &M); + /// Create an offloading section struct used to register this global at /// runtime. /// diff --git a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp index d616b4058b7bb..d3cb5346f4ba5 100644 --- a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp +++ b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp @@ -353,6 +353,16 @@ Function *createRegisterGlobalsFunction(Module &M, bool IsHIP, FunctionCallee RegVar = M.getOrInsertFunction( IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy); + // Get the __cudaRegisterSurface function declaration. + FunctionType *RegManagedVarTy = + FunctionType::get(Type::getVoidTy(C), + {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, + getSizeTTy(M), Type::getInt32Ty(C)}, + /*isVarArg=*/false); + FunctionCallee RegManagedVar = M.getOrInsertFunction( + IsHIP ? "__hipRegisterManagedVar" : "__cudaRegisterManagedVar", + RegManagedVarTy); + // Get the __cudaRegisterSurface function declaration. FunctionType *RegSurfaceTy = FunctionType::get(Type::getVoidTy(C), @@ -466,6 +476,12 @@ Function *createRegisterGlobalsFunction(Module &M, bool IsHIP, // Create managed variable registration code. Builder.SetInsertPoint(SwManagedBB); + auto *ManagedVar = Builder.CreateLoad(Int8PtrTy, Addr, "managed.addr"); + auto *ManagedAddr = Builder.CreateInBoundsGEP( + Int8PtrTy, Addr, {ConstantInt::get(Builder.getInt64Ty(), 1)}); + auto *Managed = Builder.CreateLoad(Int8PtrTy, ManagedAddr, "managed.addr"); + Builder.CreateCall(RegManagedVar, {RegGlobalsFn->arg_begin(), ManagedVar, + Managed, Name, Size, Data}); Builder.CreateBr(IfEndBB); Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalManagedEntry), SwManagedBB); diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp index 9e85ffbfe22d7..26dab0b22fa12 100644 --- a/llvm/lib/Frontend/Offloading/Utility.cpp +++ b/llvm/lib/Frontend/Offloading/Utility.cpp @@ -33,6 +33,16 @@ StructType *offloading::getEntryTy(Module &M) { return EntryTy; } +StructType *offloading::getManagedTy(Module &M) { + LLVMContext &C = M.getContext(); + StructType *StructTy = StructType::getTypeByName(C, "struct.__managed_var"); + if (!StructTy) + StructTy = llvm::StructType::create("struct.__managed_var", + PointerType::getUnqual(M.getContext()), + PointerType::getUnqual(M.getContext())); + return StructTy; +} + // TODO: Rework this interface to be more generic. std::pair offloading::getOffloadingEntryInitializer(Module &M, Constant *Addr,