Skip to content

Commit 20d2a32

Browse files
committed
[OpenCL] Allow -fno-offload-uniform-block for 1.2
OpenCL 1.2 kernel assumes uniform workgroup size. By default a function attr is added to allow backend to do optimizations. These optimizations may cause UB if such kernels are launched with non-uniform workgroup sizes. Although OpenCL 1.2 itself does not support non-uniform workgroup launching, OpenCL 1.2 kernels may be launched by OpenCL 2.0 or other offloading language runtime that supports non-uniform workgroup size. Therefore it is useful to allow -fno-offload-uniform-block to override the default uniform-block assumption for OpenCL 1.2.
1 parent 0fe20aa commit 20d2a32

File tree

3 files changed

+15
-16
lines changed

3 files changed

+15
-16
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1007,7 +1007,8 @@ def b : JoinedOrSeparate<["-"], "b">, Flags<[LinkerInput]>,
10071007
Group<Link_Group>;
10081008

10091009
defm offload_uniform_block : BoolFOption<"offload-uniform-block",
1010-
LangOpts<"OffloadUniformBlock">, Default<"LangOpts->CUDA">,
1010+
LangOpts<"OffloadUniformBlock">,
1011+
Default<"LangOpts->CUDA || (LangOpts->OpenCL && LangOpts->OpenCLVersion <= 120)">,
10111012
PosFlag<SetTrue, [], [ClangOption, CC1Option], "Assume">,
10121013
NegFlag<SetFalse, [], [ClangOption, CC1Option], "Don't assume">,
10131014
BothFlags<[], [ClangOption], " that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)">>;

clang/lib/CodeGen/CGCall.cpp

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2431,21 +2431,18 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
24312431
NumElemsParam);
24322432
}
24332433

2434-
if (TargetDecl->hasAttr<OpenCLKernelAttr>()) {
2435-
if (getLangOpts().OpenCLVersion <= 120) {
2436-
// OpenCL v1.2 Work groups are always uniform
2437-
FuncAttrs.addAttribute("uniform-work-group-size", "true");
2438-
} else {
2439-
// OpenCL v2.0 Work groups may be whether uniform or not.
2440-
// '-cl-uniform-work-group-size' compile option gets a hint
2441-
// to the compiler that the global work-size be a multiple of
2442-
// the work-group size specified to clEnqueueNDRangeKernel
2443-
// (i.e. work groups are uniform).
2444-
FuncAttrs.addAttribute(
2445-
"uniform-work-group-size",
2446-
llvm::toStringRef(getLangOpts().OffloadUniformBlock));
2447-
}
2448-
}
2434+
// OpenCL v1.2 Work groups are always uniform
2435+
// OpenCL v2.0 Work groups may be whether uniform or not.
2436+
// '-cl-uniform-work-group-size' compile option gets a hint
2437+
// to the compiler that the global work-size be a multiple of
2438+
// the work-group size specified to clEnqueueNDRangeKernel
2439+
// (i.e. work groups are uniform).
2440+
// OffloadUniformBlock defaults to true for OpenCL v1.2 and false
2441+
// for OpenCL 2.0, and its value is overriden by a compilation option.
2442+
if (TargetDecl->hasAttr<OpenCLKernelAttr>())
2443+
FuncAttrs.addAttribute(
2444+
"uniform-work-group-size",
2445+
llvm::toStringRef(getLangOpts().OffloadUniformBlock));
24492446

24502447
if (TargetDecl->hasAttr<CUDAGlobalAttr>() &&
24512448
getLangOpts().OffloadUniformBlock)

clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
2+
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -fno-offload-uniform-block -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM
23
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM
34
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
45
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -foffload-uniform-block -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM

0 commit comments

Comments
 (0)