Skip to content

Commit e6e954c

Browse files
committed
[Driver] Add -f[no-]offload-uniform-block
Part of this patch (https://reviews.llvm.org/D155213) has got into amd-mainline-open already, however, part of it got lost. As a result, -fno-offload-uniform-block has no effect on HIP kernels. This patch cherry-picks the missing part of 2855d5b Original differential Revision: https://reviews.llvm.org/D155213 Fixes: SWDEV-441187 Change-Id: I03794d52d990988f9a2f16b1639bf93521ec0dd8
1 parent a5e2e4f commit e6e954c

File tree

5 files changed

+37
-7
lines changed

5 files changed

+37
-7
lines changed

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -401,13 +401,6 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
401401
if (FD)
402402
setFunctionDeclAttributes(FD, F, M);
403403

404-
const bool IsHIPKernel =
405-
M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>();
406-
407-
// TODO: This should be moved to language specific attributes instead.
408-
if (IsHIPKernel)
409-
F->addFnAttr("uniform-work-group-size", "true");
410-
411404
if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
412405
F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");
413406

clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,18 @@
1010
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
1111
// RUN: -verify -o - -x hip %s | FileCheck -check-prefix=NAMD %s
1212

13+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -foffload-uniform-block \
14+
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
15+
// RUN: | FileCheck -check-prefixes=CHECK,DEFAULT %s
16+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fno-offload-uniform-block \
17+
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
18+
// RUN: | FileCheck -check-prefixes=NOUB %s
19+
1320
#include "Inputs/cuda.h"
1421

1522
__global__ void flat_work_group_size_default() {
1623
// CHECK: define{{.*}} amdgpu_kernel void @_Z28flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT:#[0-9]+]]
24+
// NOUB: define{{.*}} void @_Z28flat_work_group_size_defaultv() [[NOUB:#[0-9]+]]
1725
}
1826

1927
__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
@@ -45,3 +53,5 @@ __global__ void num_vgpr_64() {
4553
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"
4654
// CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32"
4755
// CHECK-DAG: attributes [[NUM_VGPR_64]] = {{.*}}"amdgpu-num-vgpr"="64"
56+
57+
// NOUB-NOT: "uniform-work-group-size"="true"

clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
22
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM
33
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
4+
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -foffload-uniform-block -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
45

56
kernel void ker() {};
67
// CHECK: define{{.*}}@ker() #0

clang/test/Driver/hip-options.hip

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,3 +169,27 @@
169169
// RUN: %clang -### -nogpuinc -nogpulib -fhip-fp32-correctly-rounded-divide-sqrt \
170170
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefixes=CRDS %s
171171
// CRDS-NOT: "-f{{(no-)?}}hip-fp32-correctly-rounded-divide-sqrt"
172+
/ Check -fno-offload-uniform-block is passed to clang -cc1 but
173+
// (default) -fno-offload-uniform-block is not.
174+
175+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fno-offload-uniform-block \
176+
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=NOUNIBLK %s
177+
178+
// NOUNIBLK: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fno-offload-uniform-block"
179+
// NOUNIBLK: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fno-offload-uniform-block"
180+
181+
// RUN: %clang -### -nogpuinc -nogpulib -foffload-uniform-block \
182+
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=UNIBLK %s
183+
184+
// UNIBLK: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-foffload-uniform-block"
185+
// UNIBLK: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-foffload-uniform-block"
186+
187+
// RUN: %clang -### -nogpuinc -nogpulib \
188+
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=DEFUNIBLK %s
189+
190+
// DEFUNIBLK-NOT: "-f{{(no-)?}}offload-uniform-block"
191+
192+
// Check no warnings for -f[no-]offload-uniform-block.
193+
194+
// RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fno-offload-uniform-block \
195+
// RUN: -foffload-uniform-block --cuda-gpu-arch=gfx906 %s 2>&1 | count 0

clang/test/Driver/opencl.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
// RUN: %clang -S -### -cl-denorms-are-zero %s 2>&1 | FileCheck --check-prefix=CHECK-DENORMS-ARE-ZERO %s
1818
// RUN: %clang -S -### -cl-fp32-correctly-rounded-divide-sqrt %s 2>&1 | FileCheck --check-prefix=CHECK-ROUND-DIV %s
1919
// RUN: %clang -S -### -cl-uniform-work-group-size %s 2>&1 | FileCheck --check-prefix=CHECK-UNIFORM-WG %s
20+
// RUN: %clang -S -### -foffload-uniform-block %s 2>&1 | FileCheck --check-prefix=CHECK-UNIFORM-WG %s
21+
// RUN: %clang -S -### -fno-offload-uniform-block -cl-uniform-work-group-size %s 2>&1 | FileCheck --check-prefix=CHECK-UNIFORM-WG %s
2022
// RUN: not %clang -cl-std=c99 -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-C99 %s
2123
// RUN: not %clang -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s
2224
// RUN: %clang -S -### -target spir-unknown-unknown %s 2>&1 | FileCheck --check-prefix=CHECK-W-SPIR-COMPAT %s

0 commit comments

Comments
 (0)