Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
1cbe971
[SYCL] Add max work-group size kernel properties
frasercrmck Jul 3, 2024
b4d3bf1
feedback: total -> linear
frasercrmck Jul 10, 2024
a9b43f2
Update sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_proper…
frasercrmck Jul 11, 2024
cea3495
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Jul 15, 2024
97cfa72
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Jul 16, 2024
f0ab74c
feedback: maybe_unused; delete comment; update spec for exception wor…
frasercrmck Jul 16, 2024
113db50
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Jul 18, 2024
d6d2892
update llvm-spirv
frasercrmck Jul 18, 2024
fb88877
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Aug 5, 2024
6ee833c
update tests
frasercrmck Aug 5, 2024
7722aac
emit to program metadata; add tests
frasercrmck Aug 6, 2024
e970e1e
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Aug 7, 2024
1c60be0
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Aug 14, 2024
7a08488
test different backends
frasercrmck Aug 14, 2024
ada9cb8
fix formatting
frasercrmck Aug 14, 2024
e4e9272
Revert "test different backends"
frasercrmck Aug 20, 2024
d427d12
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Aug 20, 2024
ea9c293
update tests
frasercrmck Aug 20, 2024
5a2f3a6
add sycl runtime checking
frasercrmck Aug 20, 2024
63a776b
fix ur link
frasercrmck Aug 20, 2024
7c5ed9f
workaround unsupported
frasercrmck Aug 21, 2024
2d2ce2b
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Aug 21, 2024
91d632f
bump metadata size; bump UR
frasercrmck Aug 22, 2024
97e6cc1
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Aug 22, 2024
cadc81b
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Aug 22, 2024
079948b
bump
frasercrmck Aug 22, 2024
275c9ae
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Aug 27, 2024
1594d8c
update docs
frasercrmck Aug 27, 2024
bb55883
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Sep 24, 2024
4a40d12
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Sep 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions clang/lib/CodeGen/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,29 @@ static bool supportsGridConstant(CudaArch Arch) {
return Arch >= CudaArch::SM_70;
}

static llvm::SmallVector<std::optional<int64_t>, 3>
decomposeSYCLWGAttr(const llvm::Attribute &Attr) {
// Split up values in the comma-separated list of integers.
SmallVector<StringRef, 3> ValStrs;
Attr.getValueAsString().split(ValStrs, ',');
assert(ValStrs.size() <= 3 && "Must have at most three dimensions for "
"SYCL work-group property");

llvm::SmallVector<std::optional<int64_t>, 3> Ops;
// Index-flip the values; SYCL specifies fastest-moving dimensions
// right-to-left: NVPTX is left-to-right.
for (auto ValStr : reverse(ValStrs)) {
size_t Value = 0;
[[maybe_unused]] bool Error = ValStr.getAsInteger(10, Value);
assert(!Error && "The attribute's value is not a number");
Ops.push_back(Value);
}
// Pad out any missing elements
Ops.append(3 - std::max(Ops.size(), size_t{3}), std::nullopt);

return Ops;
}

void NVPTXTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
if (GV->isDeclaration())
Expand Down Expand Up @@ -301,6 +324,33 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
addNVVMMetadata(F, "maxntidx", MWGS->getZDimVal());
addNVVMMetadata(F, "maxntidy", MWGS->getYDimVal());
addNVVMMetadata(F, "maxntidz", MWGS->getXDimVal());
} else if (auto Attr = F->getFnAttribute("sycl-max-work-group-size");
Attr.isValid()) {
auto Ops = decomposeSYCLWGAttr(Attr);

// Work-group sizes (in NVVM annotations) must be positive and less than
// INT32_MAX, whereas SYCL can allow for larger work-group sizes (see
// -fno-sycl-id-queries-fit-in-int). If any dimension is too large for
// NVPTX, don't emit any annotation at all.
if (llvm::all_of(Ops, [](std::optional<int64_t> V) {
return !V || llvm::isUInt<31>(*V);
})) {
static constexpr const char *Annots[] = {"maxntidx", "maxntidy",
"maxntidz"};
for (auto [AnnotStr, Val] : zip(Annots, Ops))
if (Val.has_value())
addNVVMMetadata(F, AnnotStr, *Val);
}
}

if (auto Attr = F->getFnAttribute("sycl-max-linear-work-group-size");
Attr.isValid()) {
size_t Value = 0;
bool Error = Attr.getValueAsString().getAsInteger(10, Value);
assert(!Error && "The attribute's value is not a number");
if (llvm::isUInt<31>(Value)) {
addNVVMMetadata(F, "maxntidx", Value);
}
}

auto attrValue = [&](Expr *E) {
Expand Down
29 changes: 16 additions & 13 deletions llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,18 +361,24 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) {
AddFPControlMetadataForWidth(SPIRV_DENORM_PRESERVE, 64);
}

if (AttrKindStr == "sycl-work-group-size" ||
AttrKindStr == "sycl-work-group-size-hint") {
static constexpr std::tuple<const char *, const char *> SimpleWGAttrs[] = {
{"sycl-work-group-size", "reqd_work_group_size"},
{"sycl-work-group-size-hint", "work_group_size_hint"},
{"sycl-max-work-group-size", "max_work_group_size"},
};

for (auto &[AttrKind, MDStr] : SimpleWGAttrs) {
if (AttrKindStr != AttrKind)
continue;
// Split values in the comma-separated list integers.
SmallVector<StringRef, 3> ValStrs;
Attr.getValueAsString().split(ValStrs, ',');
SmallVector<StringRef, 3> AttrValStrs;
Attr.getValueAsString().split(AttrValStrs, ',');

assert(ValStrs.size() <= 3 &&
"sycl-work-group-size and sycl-work-group-size-hint currently only "
"support up to three values");
assert(AttrValStrs.size() <= 3 &&
"Incorrect number of values for kernel property");

// SYCL work-group sizes must be reversed for SPIR-V.
std::reverse(ValStrs.begin(), ValStrs.end());
std::reverse(AttrValStrs.begin(), AttrValStrs.end());

// Use integer pointer size as closest analogue to size_t.
IntegerType *IntPtrTy = DLayout.getIntPtrType(Ctx);
Expand All @@ -381,14 +387,11 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) {

// Get the integers from the strings.
SmallVector<Metadata *, 3> MDVals;
for (StringRef ValStr : ValStrs)
for (StringRef ValStr : AttrValStrs)
MDVals.push_back(ConstantAsMetadata::get(
Constant::getIntegerValue(SizeTTy, APInt(SizeTBitSize, ValStr, 10))));

const char *MDName = (AttrKindStr == "sycl-work-group-size")
? "reqd_work_group_size"
: "work_group_size_hint";
return std::pair<std::string, MDNode *>(MDName, MDNode::get(Ctx, MDVals));
return std::pair<std::string, MDNode *>(MDStr, MDNode::get(Ctx, MDVals));
}

if (AttrKindStr == "sycl-sub-group-size") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ Jessica Davies, Intel +
Joe Garvey, Intel +
Greg Lueck, Intel +
John Pennycook, Intel +
Roland Schulz, Intel
Roland Schulz, Intel +
Fraser Cormack, Codeplay

== Overview

Expand Down Expand Up @@ -116,10 +117,14 @@ supports.

=== Kernel Properties

The kernel properties below correspond to kernel attributes defined in
Most of the kernel properties below correspond to kernel attributes defined in
Section 5.8.1 of the SYCL 2020 specification. Note that deprecated attributes
(such as `vec_type_hint`) are not included.

The `max_work_group_size` and `max_linear_work_group_size` kernel properties
are also provided as complements to other properties concerning work-group
sizes, without a corresponding function attribute form.

```c++
namespace sycl {
namespace ext {
Expand All @@ -138,6 +143,17 @@ struct work_group_size_hint_key {
using value_t = property_value<work_group_size_hint_key, std::integral_constant<size_t, Dims>...>;
}; // work_group_size_hint_key

// Corresponds to max_work_group_size
struct max_work_group_size_key {
template <size_t... Dims>
using value_t = property_value<max_work_group_size_key, std::integral_constant<size_t, Dims>...>;
}; // max_work_group_size_key

struct max_linear_work_group_size_key {
template <size_t Size>
using value_t = property_value<max_linear_work_group_size_key, std::integral_constant<size_t, Size>>;
}; // max_linear_work_group_size_key

// Corresponds to reqd_sub_group_size
struct sub_group_size_key {
template <uint32_t Size>
Expand Down Expand Up @@ -174,6 +190,12 @@ inline constexpr work_group_size_key::value_t<Dims...> work_group_size;
template <size_t... Dims>
inline constexpr work_group_size_hint_key::value_t<Dims...> work_group_size_hint;

template <size_t... Dims>
inline constexpr max_work_group_size_key::value_t<Dims...> max_work_group_size;

template <size_t Size>
inline constexpr max_linear_work_group_size_key::value_t<Size> max_linear_work_group_size;

template <uint32_t Size>
inline constexpr sub_group_size_key::value_t<Size> sub_group_size;

Expand All @@ -182,6 +204,8 @@ inline constexpr device_has_key::value_t<Aspects...> device_has;

template <> struct is_property_key<work_group_size_key> : std::true_type {};
template <> struct is_property_key<work_group_size_hint_key> : std::true_type {};
template <> struct is_property_key<max_work_group_size_key> : std::true_type {};
template <> struct is_property_key<max_linear_work_group_size_key> : std::true_type {};
template <> struct is_property_key<sub_group_size_key> : std::true_type {};
template <> struct is_property_key<device_has_key> : std::true_type {};

Expand Down Expand Up @@ -211,6 +235,18 @@ template <> struct is_property_key<device_has_key> : std::true_type {};
of the work-group used to invoke the kernel. The order of the template
arguments matches the constructor of the `range` class.

|`max_work_group_size`
|The `max_work_group_size` property provides a promise to the compiler
that the kernel will never be launched with a larger work-group than the
specified size. The number of template arguments in the `Dims` parameter pack
must match the dimensionality of the work-group used to invoke the kernel. The
order of the template arguments matches the constructor of the `range` class.

|`max_linear_work_group_size`
|The `max_linear_work_group_size` property provides a promise to the compiler
that the kernel will never be launched with a work-group for which the return
value of `group::get_local_linear_range()` exceeds the specified amount.

|`sub_group_size`
|The `sub_group_size` property adds the requirement that the kernel must be
compiled and executed with the specified sub-group size. An implementation may
Expand Down
55 changes: 55 additions & 0 deletions sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,21 @@ struct single_task_kernel_key {
using value_t = property_value<single_task_kernel_key>;
};

struct max_work_group_size_key
: detail::compile_time_property_key<detail::PropKind::MaxWorkGroupSize> {
template <size_t... Dims>
using value_t = property_value<max_work_group_size_key,
std::integral_constant<size_t, Dims>...>;
};

struct max_linear_work_group_size_key
: detail::compile_time_property_key<
detail::PropKind::MaxLinearWorkGroupSize> {
template <size_t Size>
using value_t = property_value<max_linear_work_group_size_key,
std::integral_constant<size_t, Size>>;
};

template <size_t Dim0, size_t... Dims>
struct property_value<work_group_size_key, std::integral_constant<size_t, Dim0>,
std::integral_constant<size_t, Dims>...> {
Expand Down Expand Up @@ -138,6 +153,28 @@ template <> struct property_value<single_task_kernel_key> {
using key_t = single_task_kernel_key;
};

template <size_t Dim0, size_t... Dims>
struct property_value<max_work_group_size_key,
std::integral_constant<size_t, Dim0>,
std::integral_constant<size_t, Dims>...> {
static_assert(sizeof...(Dims) + 1 <= 3,
"max_work_group_size property currently "
"only supports up to three values.");
static_assert(
detail::AllNonZero<Dim0, Dims...>::value,
"max_work_group_size property must only contain non-zero values.");

using key_t = max_work_group_size_key;

constexpr size_t operator[](int Dim) const {
return std::array<size_t, sizeof...(Dims) + 1>{Dim0, Dims...}[Dim];
}
};

template <> struct property_value<max_linear_work_group_size_key> {
using key_t = max_linear_work_group_size_key;
};

template <size_t Dim0, size_t... Dims>
inline constexpr work_group_size_key::value_t<Dim0, Dims...> work_group_size;

Expand All @@ -156,6 +193,14 @@ inline constexpr nd_range_kernel_key::value_t<Dims> nd_range_kernel;

inline constexpr single_task_kernel_key::value_t single_task_kernel;

template <size_t Dim0, size_t... Dims>
inline constexpr max_work_group_size_key::value_t<Dim0, Dims...>
max_work_group_size;

template <size_t Size>
inline constexpr max_linear_work_group_size_key::value_t<Size>
max_linear_work_group_size;

struct work_group_progress_key
: detail::compile_time_property_key<detail::PropKind::WorkGroupProgress> {
template <forward_progress_guarantee Guarantee,
Expand Down Expand Up @@ -270,6 +315,16 @@ template <> struct PropertyMetaInfo<single_task_kernel_key::value_t> {
static constexpr const char *name = "sycl-single-task-kernel";
static constexpr int value = 0;
};
template <size_t Dim0, size_t... Dims>
struct PropertyMetaInfo<max_work_group_size_key::value_t<Dim0, Dims...>> {
static constexpr const char *name = "sycl-max-work-group-size";
static constexpr const char *value = SizeListToStr<Dim0, Dims...>::value;
};
template <size_t Size>
struct PropertyMetaInfo<max_linear_work_group_size_key::value_t<Size>> {
static constexpr const char *name = "sycl-max-linear-work-group-size";
static constexpr size_t value = Size;
};

template <typename T, typename = void>
struct HasKernelPropertiesGetMethod : std::false_type {};
Expand Down
4 changes: 3 additions & 1 deletion sycl/include/sycl/ext/oneapi/properties/property.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,10 @@ enum PropKind : uint32_t {
IncludeFiles = 71,
RegisteredKernelNames = 72,
ClusterLaunch = 73,
MaxWorkGroupSize = 74,
MaxLinearWorkGroupSize = 75,
// PropKindSize must always be the last value.
PropKindSize = 74,
PropKindSize = 76,
};

struct property_key_base_tag {};
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR
// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s
// expected-no-diagnostics

#include <sycl/sycl.hpp>

int main() {
sycl::queue Q;

constexpr auto Props = sycl::ext::oneapi::experimental::properties{
sycl::ext::oneapi::experimental::max_linear_work_group_size<4>,
};
// CHECK-IR: spir_kernel void @{{.*}}LaunchBoundsKernel(){{.*}} #[[LaunchBoundsAttrs:[0-9]+]]
Q.single_task<class LaunchBoundsKernel>(Props, []() {});

return 0;
}

// CHECK-IR: attributes #[[LaunchBoundsAttrs]] = {
// CHECK-IR-SAME: "sycl-max-linear-work-group-size"="4"
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// REQUIRES: cuda

// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR

#include <sycl/sycl.hpp>

int main() {
sycl::queue Q;

constexpr auto Props = sycl::ext::oneapi::experimental::properties{
sycl::ext::oneapi::experimental::max_linear_work_group_size<4>,
};

// CHECK-IR: define{{.*}}void @[[LaunchBoundsKernelFn:.*LaunchBoundsKernel0]](){{.*}} #[[LaunchBoundsAttrs:[0-9]+]]
Q.single_task<class LaunchBoundsKernel0>(Props, []() {});

return 0;
}

// CHECK-IR: attributes #[[LaunchBoundsAttrs]] = {
// CHECK-IR-SAME: "sycl-max-linear-work-group-size"="4"

// CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"kernel", i32 1}
// CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"maxntidx", i32 4}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR
// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s
// expected-no-diagnostics

#include <sycl/sycl.hpp>

int main() {
sycl::queue Q;
sycl::event Ev;

constexpr auto Props1 = sycl::ext::oneapi::experimental::properties{
sycl::ext::oneapi::experimental::max_work_group_size<8>};
constexpr auto Props2 = sycl::ext::oneapi::experimental::properties{
sycl::ext::oneapi::experimental::max_work_group_size<8, 4>};
constexpr auto Props3 = sycl::ext::oneapi::experimental::properties{
sycl::ext::oneapi::experimental::max_work_group_size<8, 4, 2>};

// CHECK-IR: spir_kernel void @{{.*}}MaxWGSizeKernel0(){{.*}} #[[MaxWGSizeAttr0:[0-9]+]]
// CHECK-IR-SAME: !max_work_group_size ![[MaxWGSizeMD0:[0-9]+]]
Q.single_task<class MaxWGSizeKernel0>(Props1, []() {});
// CHECK-IR: spir_kernel void @{{.*}}MaxWGSizeKernel1(){{.*}} #[[MaxWGSizeAttr1:[0-9]+]]
// CHECK-IR-SAME: !max_work_group_size ![[MaxWGSizeMD1:[0-9]+]]
Q.single_task<class MaxWGSizeKernel1>(Ev, Props2, []() {});
// CHECK-IR: spir_kernel void @{{.*}}MaxWGSizeKernel2(){{.*}} #[[MaxWGSizeAttr2:[0-9]+]]
// CHECK-IR-SAME: !max_work_group_size ![[MaxWGSizeMD2:[0-9]+]]
Q.single_task<class MaxWGSizeKernel2>({Ev}, Props3, []() {});

return 0;
}

// CHECK-IR: attributes #[[MaxWGSizeAttr0]] = { {{.*}}"sycl-max-work-group-size"="8"
// CHECK-IR: attributes #[[MaxWGSizeAttr1]] = { {{.*}}"sycl-max-work-group-size"="8,4"
// CHECK-IR: attributes #[[MaxWGSizeAttr2]] = { {{.*}}"sycl-max-work-group-size"="8,4,2"

// CHECK-IR: ![[MaxWGSizeMD0]] = !{i64 8}
// CHECK-IR: ![[MaxWGSizeMD1]] = !{i64 4, i64 8}
// CHECK-IR: ![[MaxWGSizeMD2]] = !{i64 2, i64 4, i64 8}
Loading