Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1007,7 +1007,8 @@ def b : JoinedOrSeparate<["-"], "b">, Flags<[LinkerInput]>,
Group<Link_Group>;

defm offload_uniform_block : BoolFOption<"offload-uniform-block",
LangOpts<"OffloadUniformBlock">, Default<"LangOpts->CUDA">,
LangOpts<"OffloadUniformBlock">,
Default<"LangOpts->CUDA || (LangOpts->OpenCL && LangOpts->OpenCLVersion <= 120)">,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "Assume">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "Don't assume">,
BothFlags<[], [ClangOption], " that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)">>;
Expand Down
27 changes: 12 additions & 15 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2431,21 +2431,18 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
NumElemsParam);
}

if (TargetDecl->hasAttr<OpenCLKernelAttr>()) {
if (getLangOpts().OpenCLVersion <= 120) {
// OpenCL v1.2 Work groups are always uniform
FuncAttrs.addAttribute("uniform-work-group-size", "true");
} else {
// OpenCL v2.0 Work groups may be whether uniform or not.
// '-cl-uniform-work-group-size' compile option gets a hint
// to the compiler that the global work-size be a multiple of
// the work-group size specified to clEnqueueNDRangeKernel
// (i.e. work groups are uniform).
FuncAttrs.addAttribute(
"uniform-work-group-size",
llvm::toStringRef(getLangOpts().OffloadUniformBlock));
}
}
// OpenCL v1.2 Work groups are always uniform
// OpenCL v2.0 Work groups may be whether uniform or not.
// '-cl-uniform-work-group-size' compile option gets a hint
// to the compiler that the global work-size be a multiple of
// the work-group size specified to clEnqueueNDRangeKernel
// (i.e. work groups are uniform).
// OffloadUniformBlock defaults to true for OpenCL v1.2 and false
// for OpenCL 2.0, and its value is overriden by a compilation option.
if (TargetDecl->hasAttr<OpenCLKernelAttr>())
FuncAttrs.addAttribute(
"uniform-work-group-size",
llvm::toStringRef(getLangOpts().OffloadUniformBlock));

if (TargetDecl->hasAttr<CUDAGlobalAttr>() &&
getLangOpts().OffloadUniformBlock)
Expand Down
1 change: 1 addition & 0 deletions clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -fno-offload-uniform-block -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -foffload-uniform-block -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
Expand Down