jhuber6
diff --git a/‎clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp‎
Lines changed: 8 additions & 47 deletions b/‎clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp‎
Lines changed: 8 additions & 47 deletions
diff --git a/‎clang/lib/CodeGen/Targets/AMDGPU.cpp‎
Lines changed: 0 additions & 36 deletions b/‎clang/lib/CodeGen/Targets/AMDGPU.cpp‎
Lines changed: 0 additions & 36 deletions
diff --git a/‎clang/test/CodeGen/amdgpu-abi-version.c‎
Lines changed: 13 additions & 12 deletions b/‎clang/test/CodeGen/amdgpu-abi-version.c‎
Lines changed: 13 additions & 12 deletions
diff --git a/‎clang/test/CodeGen/amdgpu-address-spaces.cpp‎
Lines changed: 7 additions & 1 deletion b/‎clang/test/CodeGen/amdgpu-address-spaces.cpp‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu‎
Lines changed: 0 additions & 133 deletions b/‎clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu‎
Lines changed: 0 additions & 133 deletions
diff --git a/‎clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu‎
Lines changed: 0 additions & 34 deletions b/‎clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu‎
Lines changed: 0 additions & 34 deletions
@@ -62,62 +62,23 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
 /// Emit code based on Code Object ABI version.
 /// COV_4    : Emit code to use dispatch ptr
 /// COV_5+   : Emit code to use implicitarg ptr
-/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
-///            and use its value for COV_4 or COV_5+ approach. It is used for
-///            compiling device libraries in an ABI-agnostic way.
-///
-/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
-///       clang during compilation of user code.
 Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
   llvm::LoadInst *LD;
 
   auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
 
-  if (Cov == CodeObjectVersionKind::COV_None) {
-    StringRef Name = "__oclc_ABI_version";
-    auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
-    if (!ABIVersionC)
-      ABIVersionC = new llvm::GlobalVariable(
-          CGF.CGM.getModule(), CGF.Int32Ty, false,
-          llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
-          llvm::GlobalVariable::NotThreadLocal,
-          CGF.CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
-
-    // This load will be eliminated by the IPSCCP because it is constant
-    // weak_odr without externally_initialized. Either changing it to weak or
-    // adding externally_initialized will keep the load.
-    Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
-                                                      CGF.CGM.getIntAlign());
-
-    Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
-        ABIVersion,
-        llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
-
+  Value *GEP = nullptr;
+  if (Cov >= CodeObjectVersionKind::COV_5) {
     // Indexing the implicit kernarg segment.
-    Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
+    GEP = CGF.Builder.CreateConstGEP1_32(
         CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
-
-    // Indexing the HSA kernel_dispatch_packet struct.
-    Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
-        CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
-
-    auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
-    LD = CGF.Builder.CreateLoad(
-        Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
   } else {
-    Value *GEP = nullptr;
-    if (Cov >= CodeObjectVersionKind::COV_5) {
-      // Indexing the implicit kernarg segment.
-      GEP = CGF.Builder.CreateConstGEP1_32(
-          CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
-    } else {
-      // Indexing the HSA kernel_dispatch_packet struct.
-      GEP = CGF.Builder.CreateConstGEP1_32(
-          CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
-    }
-    LD = CGF.Builder.CreateLoad(
-        Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
+    // Indexing the HSA kernel_dispatch_packet struct.
+    GEP = CGF.Builder.CreateConstGEP1_32(CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF),
+                                         4 + Index * 2);
   }
+  LD = CGF.Builder.CreateLoad(
+      Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
 
   llvm::MDBuilder MDHelper(CGF.getLLVMContext());
   llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
 
@@ -305,8 +305,6 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
   void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
                                  CodeGenModule &CGM) const;
 
-  void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override;
-
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
   unsigned getOpenCLKernelCallingConv() const override;
@@ -414,40 +412,6 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
   }
 }
 
-/// Emits control constants used to change per-architecture behaviour in the
-/// AMDGPU ROCm device libraries.
-void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
-    CodeGen::CodeGenModule &CGM) const {
-  StringRef Name = "__oclc_ABI_version";
-  llvm::GlobalVariable *OriginalGV = CGM.getModule().getNamedGlobal(Name);
-  if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage()))
-    return;
-
-  if (CGM.getTarget().getTargetOpts().CodeObjectVersion ==
-      llvm::CodeObjectVersionKind::COV_None)
-    return;
-
-  auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32);
-  llvm::Constant *COV = llvm::ConstantInt::get(
-      Type, CGM.getTarget().getTargetOpts().CodeObjectVersion);
-
-  // It needs to be constant weak_odr without externally_initialized so that
-  // the load instuction can be eliminated by the IPSCCP.
-  auto *GV = new llvm::GlobalVariable(
-      CGM.getModule(), Type, true, llvm::GlobalValue::WeakODRLinkage, COV, Name,
-      nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
-      CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
-  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
-  GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility);
-
-  // Replace any external references to this variable with the new global.
-  if (OriginalGV) {
-    OriginalGV->replaceAllUsesWith(GV);
-    GV->takeName(OriginalGV);
-    OriginalGV->eraseFromParent();
-  }
-}
-
 void AMDGPUTargetCodeGenInfo::setTargetAttributes(
     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
   if (requiresAMDGPUProtectedVisibility(D, GV)) {
 
@@ -1,23 +1,24 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --version 3
 // RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -mcode-object-version=none %s -o - | FileCheck %s
 
-//.
-// CHECK: @__oclc_ABI_version = external addrspace(4) global i32
-//.
 // CHECK-LABEL: define dso_local i32 @foo(
 // CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(4) @__oclc_ABI_version, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = icmp sge i32 [[TMP0]], 500
-// CHECK-NEXT:    [[TMP2:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP2]], i32 12
-// CHECK-NEXT:    [[TMP4:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP4]], i32 4
-// CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP1]], ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP5]]
-// CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG2:![0-9]+]], !invariant.load !3, !noundef !3
-// CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[TMP7]] to i32
+// CHECK-NEXT:    [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 2, !range [[RNG2:![0-9]+]], !invariant.load [[META3:![0-9]+]], !noundef [[META3]]
+// CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[TMP2]] to i32
 // CHECK-NEXT:    ret i32 [[CONV]]
 //
 int foo() { return __builtin_amdgcn_workgroup_size_x(); }
+//.
+// CHECK: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+//.
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+// CHECK: [[RNG2]] = !{i16 1, i16 1025}
+// CHECK: [[META3]] = !{}
+//.
@@ -29,7 +29,6 @@ int [[clang::address_space(999)]] bbb = 1234;
 // CHECK: @u = addrspace(5) global i32 undef, align 4
 // CHECK: @aaa = addrspace(6) global i32 1000, align 4
 // CHECK: @bbb = addrspace(999) global i32 1234, align 4
-// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
 //.
 // CHECK-LABEL: define dso_local amdgpu_kernel void @foo(
 // CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
@@ -60,3 +59,10 @@ extern "C" [[clang::amdgpu_kernel]] void foo() {
   aaa = 0;
   bbb = 0;
 }
+//.
+// CHECK: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+//.
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
+// CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
@@ -2,7 +2,6 @@
 // RUN:     -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=PRECOV5 %s
 
-
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
 // RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=COV5 %s
@@ -11,10 +10,6 @@
 // RUN:     -fcuda-is-device -mcode-object-version=6 -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=COV5 %s
 
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
-// RUN:     -fcuda-is-device -mcode-object-version=none -emit-llvm -o - -x hip %s \
-// RUN:     | FileCheck -check-prefix=COVNONE %s
-
 #include "Inputs/cuda.h"
 
 // PRECOV5-LABEL: test_get_workgroup_size
@@ -35,35 +30,6 @@
 // COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
 // COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
 
-
-// COVNONE-LABEL: test_get_workgroup_size
-// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
-// COVNONE: [[ABI5_X:%.*]] = icmp sge i32 %{{.*}}, 500
-// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// COVNONE: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
-// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// COVNONE: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
-// COVNONE: select i1 [[ABI5_X]], ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
-// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-
-// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
-// COVNONE: [[ABI5_Y:%.*]] = icmp sge i32 %{{.*}}, 500
-// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// COVNONE: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
-// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// COVNONE: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
-// COVNONE: select i1 [[ABI5_Y]], ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
-// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-
-// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
-// COVNONE: [[ABI5_Z:%.*]] = icmp sge i32 %{{.*}}, 500
-// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// COVNONE: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
-// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// COVNONE: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
-// COVNONE: select i1 [[ABI5_Z]], ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
-// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-
 __device__ void test_get_workgroup_size(int d, int *out)
 {
   switch (d) {