Skip to content

Commit 7d3737f

Browse files
authored
Revert "[NVPTX] Make ctor/dtor lowering always enabled in NVPTX (#126544)"
This reverts commit 3d9409f.
1 parent b969726 commit 7d3737f

File tree

12 files changed

+78
-28
lines changed

12 files changed

+78
-28
lines changed

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9193,8 +9193,6 @@ def err_cuda_device_exceptions : Error<
91939193
def err_dynamic_var_init : Error<
91949194
"dynamic initialization is not supported for "
91959195
"__device__, __constant__, __shared__, and __managed__ variables">;
9196-
def err_cuda_ctor_dtor_attrs
9197-
: Error<"CUDA does not support global %0 for __device__ functions">;
91989196
def err_shared_var_init : Error<
91999197
"initialization is not supported for __shared__ variables">;
92009198
def err_cuda_vla : Error<

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,9 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
639639
CmdArgs.push_back(
640640
Args.MakeArgString("--plugin-opt=-mattr=" + llvm::join(Features, ",")));
641641

642+
// Enable ctor / dtor lowering for the direct / freestanding NVPTX target.
643+
CmdArgs.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"});
644+
642645
// Add paths for the default clang library path.
643646
SmallString<256> DefaultLibPath =
644647
llvm::sys::path::parent_path(TC.getDriver().Dir);
@@ -723,8 +726,9 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
723726
/// toolchain.
724727
NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
725728
const llvm::Triple &HostTriple,
726-
const ArgList &Args)
727-
: ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args) {
729+
const ArgList &Args, bool Freestanding = false)
730+
: ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args),
731+
Freestanding(Freestanding) {
728732
if (CudaInstallation.isValid())
729733
getProgramPaths().push_back(std::string(CudaInstallation.getBinPath()));
730734
// Lookup binaries into the driver directory, this is used to
@@ -736,7 +740,8 @@ NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
736740
/// system's default triple if not provided.
737741
NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
738742
const ArgList &Args)
739-
: NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args) {}
743+
: NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args,
744+
/*Freestanding=*/true) {}
740745

741746
llvm::opt::DerivedArgList *
742747
NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
@@ -777,7 +782,13 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
777782

778783
void NVPTXToolChain::addClangTargetOptions(
779784
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
780-
Action::OffloadKind DeviceOffloadingKind) const {}
785+
Action::OffloadKind DeviceOffloadingKind) const {
786+
// If we are compiling with a standalone NVPTX toolchain we want to try to
787+
// mimic a standard environment as much as possible. So we enable lowering
788+
// ctor / dtor functions to global symbols that can be registered.
789+
if (Freestanding && !getDriver().isUsingLTO())
790+
CC1Args.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"});
791+
}
781792

782793
bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
783794
const Option &O = A->getOption();

clang/lib/Driver/ToolChains/Cuda.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,8 @@ namespace toolchains {
132132
class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain {
133133
public:
134134
NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
135-
const llvm::Triple &HostTriple,
136-
const llvm::opt::ArgList &Args);
135+
const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args,
136+
bool Freestanding);
137137

138138
NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
139139
const llvm::opt::ArgList &Args);
@@ -179,6 +179,9 @@ class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain {
179179
protected:
180180
Tool *buildAssembler() const override; // ptxas.
181181
Tool *buildLinker() const override; // nvlink.
182+
183+
private:
184+
bool Freestanding = false;
182185
};
183186

184187
class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain {

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7484,15 +7484,6 @@ void Sema::ProcessDeclAttributeList(
74847484
}
74857485
}
74867486

7487-
// Do not permit 'constructor' or 'destructor' attributes on __device__ code.
7488-
if (getLangOpts().CUDAIsDevice && D->hasAttr<CUDADeviceAttr>() &&
7489-
(D->hasAttr<ConstructorAttr>() || D->hasAttr<DestructorAttr>()) &&
7490-
!getLangOpts().GPUAllowDeviceInit) {
7491-
Diag(D->getLocation(), diag::err_cuda_ctor_dtor_attrs)
7492-
<< (D->hasAttr<ConstructorAttr>() ? "constructors" : "destructors");
7493-
D->setInvalidDecl();
7494-
}
7495-
74967487
// Do this check after processing D's attributes because the attribute
74977488
// objc_method_family can change whether the given method is in the init
74987489
// family, and it can be applied after objc_designated_initializer. This is a

clang/test/Driver/cuda-cross-compiling.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,19 @@
5757

5858
// LINK: clang-nvlink-wrapper{{.*}}"-o" "a.out" "-arch" "sm_61"{{.*}}[[CUBIN:.+]].o
5959

60+
//
61+
// Test to ensure that we enable handling global constructors in a freestanding
62+
// Nvidia compilation.
63+
//
64+
// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_70 %s -### 2>&1 \
65+
// RUN: | FileCheck -check-prefix=LOWERING %s
66+
// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_70 -flto -c %s -### 2>&1 \
67+
// RUN: | FileCheck -check-prefix=LOWERING-LTO %s
68+
69+
// LOWERING: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-mllvm" "--nvptx-lower-global-ctor-dtor"
70+
// LOWERING: clang-nvlink-wrapper{{.*}} "-mllvm" "--nvptx-lower-global-ctor-dtor"
71+
// LOWERING-LTO-NOT: "--nvptx-lower-global-ctor-dtor"
72+
6073
//
6174
// Test passing arguments directly to nvlink.
6275
//

clang/test/SemaCUDA/device-var-init.cu

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -485,12 +485,3 @@ void instantiate() {
485485
bar<NontrivialInitializer><<<1, 1>>>();
486486
// expected-note@-1 {{in instantiation of function template specialization 'bar<NontrivialInitializer>' requested here}}
487487
}
488-
489-
__device__ void *ptr1 = nullptr;
490-
__device__ void *ptr2 = ptr1;
491-
// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
492-
493-
__device__ [[gnu::constructor(101)]] void ctor() {}
494-
// expected-error@-1 {{CUDA does not support global constructors for __device__ functions}}
495-
__device__ [[gnu::destructor(101)]] void dtor() {}
496-
// expected-error@-1 {{CUDA does not support global destructors for __device__ functions}}

libc/cmake/modules/LLVMLibCTestRules.cmake

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,12 +560,14 @@ function(add_integration_test test_name)
560560
if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
561561
target_link_options(${fq_build_target_name} PRIVATE
562562
${LIBC_COMPILE_OPTIONS_DEFAULT} ${INTEGRATION_TEST_COMPILE_OPTIONS}
563-
-Wno-multi-gpu -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto -nostdlib -static
563+
-Wno-multi-gpu -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
564+
"-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static
564565
"-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}")
565566
elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
566567
target_link_options(${fq_build_target_name} PRIVATE
567568
${LIBC_COMPILE_OPTIONS_DEFAULT} ${INTEGRATION_TEST_COMPILE_OPTIONS}
568569
"-Wl,--suppress-stack-size-warning" -Wno-multi-gpu
570+
"-Wl,-mllvm,-nvptx-lower-global-ctor-dtor=1"
569571
"-Wl,-mllvm,-nvptx-emit-init-fini-kernel"
570572
-march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static
571573
"--cuda-path=${LIBC_CUDA_ROOT}")

libcxx/test/configs/nvptx-libc++-shared.cfg.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ config.substitutions.append(('%{link_flags}',
1010
'-nostdlib++ -startfiles -stdlib '
1111
'-L %{lib-dir} -lc++ -lc++abi '
1212
'-Wl,--suppress-stack-size-warning '
13+
'-Wl,-mllvm,-nvptx-lower-global-ctor-dtor=1 '
14+
'-Wl,-mllvm,-nvptx-emit-init-fini-kernel'
1315
))
1416
config.substitutions.append(('%{exec}',
1517
'%{executor} --no-parallelism'

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,11 @@
9191

9292
using namespace llvm;
9393

94+
static cl::opt<bool>
95+
LowerCtorDtor("nvptx-lower-global-ctor-dtor",
96+
cl::desc("Lower GPU ctor / dtors to globals on the device."),
97+
cl::init(false), cl::Hidden);
98+
9499
#define DEPOTNAME "__local_depot"
95100

96101
/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
@@ -789,6 +794,22 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
789794
if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
790795
report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
791796

797+
// OpenMP supports NVPTX global constructors and destructors.
798+
bool IsOpenMP = M.getModuleFlag("openmp") != nullptr;
799+
800+
if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) &&
801+
!LowerCtorDtor && !IsOpenMP) {
802+
report_fatal_error(
803+
"Module has a nontrivial global ctor, which NVPTX does not support.");
804+
return true; // error
805+
}
806+
if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) &&
807+
!LowerCtorDtor && !IsOpenMP) {
808+
report_fatal_error(
809+
"Module has a nontrivial global dtor, which NVPTX does not support.");
810+
return true; // error
811+
}
812+
792813
// We need to call the parent's one explicitly.
793814
bool Result = AsmPrinter::doInitialization(M);
794815

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
; RUN: not --crash llc < %s -mtriple=nvptx -mcpu=sm_20 2>&1 | FileCheck %s
2+
3+
; Check that llc dies when given a nonempty global ctor.
4+
@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @foo, ptr null }]
5+
6+
; CHECK: ERROR: Module has a nontrivial global ctor
7+
define internal void @foo() {
8+
ret void
9+
}

0 commit comments

Comments
 (0)