Skip to content
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
1cbe971
[SYCL] Add max work-group size kernel properties
frasercrmck Jul 3, 2024
b4d3bf1
feedback: total -> linear
frasercrmck Jul 10, 2024
a9b43f2
Update sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_proper…
frasercrmck Jul 11, 2024
cea3495
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Jul 15, 2024
97cfa72
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Jul 16, 2024
f0ab74c
feedback: maybe_unused; delete comment; update spec for exception wor…
frasercrmck Jul 16, 2024
113db50
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Jul 18, 2024
d6d2892
update llvm-spirv
frasercrmck Jul 18, 2024
fb88877
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Aug 5, 2024
6ee833c
update tests
frasercrmck Aug 5, 2024
7722aac
emit to program metadata; add tests
frasercrmck Aug 6, 2024
e970e1e
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Aug 7, 2024
1c60be0
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Aug 14, 2024
7a08488
test different backends
frasercrmck Aug 14, 2024
ada9cb8
fix formatting
frasercrmck Aug 14, 2024
e4e9272
Revert "test different backends"
frasercrmck Aug 20, 2024
d427d12
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Aug 20, 2024
ea9c293
update tests
frasercrmck Aug 20, 2024
5a2f3a6
add sycl runtime checking
frasercrmck Aug 20, 2024
63a776b
fix ur link
frasercrmck Aug 20, 2024
7c5ed9f
workaround unsupported
frasercrmck Aug 21, 2024
2d2ce2b
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Aug 21, 2024
91d632f
bump metadata size; bump UR
frasercrmck Aug 22, 2024
97e6cc1
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Aug 22, 2024
cadc81b
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Aug 22, 2024
079948b
bump
frasercrmck Aug 22, 2024
275c9ae
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Aug 27, 2024
1594d8c
update docs
frasercrmck Aug 27, 2024
bb55883
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Sep 24, 2024
4a40d12
Merge remote-tracking branch 'origin/sycl' into sycl-max-wg-size-kern…
frasercrmck Sep 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions llvm-spirv/lib/SPIRV/PreprocessMetadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,16 +188,17 @@ void PreprocessMetadataBase::visit(Module *M) {
// i32 Y, i32 Z}
if (MDNode *MaxWorkgroupSizeINTEL =
Kernel.getMetadata(kSPIR2MD::MaxWGSize)) {
assert(MaxWorkgroupSizeINTEL->getNumOperands() == 3 &&
"max_work_group_size does not have 3 operands.");
assert(MaxWorkgroupSizeINTEL->getNumOperands() >= 1 &&
MaxWorkgroupSizeINTEL->getNumOperands() <= 3 &&
"max_work_group_size does not have between 1 and 3 operands.");
SmallVector<unsigned, 3> DecodedVals =
decodeMDNode(MaxWorkgroupSizeINTEL);
EM.addOp()
.add(&Kernel)
.add(spv::ExecutionModeMaxWorkgroupSizeINTEL)
.add(DecodedVals[0])
.add(DecodedVals[1])
.add(DecodedVals[2])
.add(DecodedVals.size() >= 2 ? DecodedVals[1] : 1)
.add(DecodedVals.size() == 3 ? DecodedVals[2] : 1)
.done();
}

Expand Down
45 changes: 31 additions & 14 deletions llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,19 +362,24 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) {
AddFPControlMetadataForWidth(SPIRV_DENORM_PRESERVE, 64);
}

if (AttrKindStr == "sycl-work-group-size" ||
AttrKindStr == "sycl-work-group-size-hint") {
static constexpr std::tuple<const char *, const char *> SimpleWGAttrs[] = {
{"sycl-work-group-size", "reqd_work_group_size"},
{"sycl-work-group-size-hint", "work_group_size_hint"},
{"sycl-max-work-group-size", "max_work_group_size"},
};

for (auto &[AttrKind, MDStr] : SimpleWGAttrs) {
if (AttrKindStr != AttrKind)
continue;
// Split values in the comma-separated list integers.
SmallVector<StringRef, 3> ValStrs;
Attr.getValueAsString().split(ValStrs, ',');
SmallVector<StringRef, 3> AttrValStrs;
Attr.getValueAsString().split(AttrValStrs, ',');

size_t NumDims = ValStrs.size();
assert(NumDims <= 3 &&
"sycl-work-group-size and sycl-work-group-size-hint currently only "
"support up to three values");
size_t NumDims = AttrValStrs.size();
assert(NumDims <= 3 && "Incorrect number of values for kernel property");

// SYCL work-group sizes must be reversed for SPIR-V.
std::reverse(ValStrs.begin(), ValStrs.end());
std::reverse(AttrValStrs.begin(), AttrValStrs.end());

// Use integer pointer size as closest analogue to size_t.
IntegerType *IntPtrTy = DLayout.getIntPtrType(Ctx);
Expand All @@ -383,7 +388,7 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) {

// Get the integers from the strings.
SmallVector<Metadata *, 3> MDVals;
for (StringRef ValStr : ValStrs)
for (StringRef ValStr : AttrValStrs)
MDVals.push_back(ConstantAsMetadata::get(
Constant::getIntegerValue(SizeTTy, APInt(SizeTBitSize, ValStr, 10))));
while (MDVals.size() < 3)
Expand All @@ -397,10 +402,7 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) {
Type::getInt32Ty(Ctx), NumDims))));
}

const char *MDName = (AttrKindStr == "sycl-work-group-size")
? "reqd_work_group_size"
: "work_group_size_hint";
return std::pair<std::string, MDNode *>(MDName, MDNode::get(Ctx, MDVals));
return std::pair<std::string, MDNode *>(MDStr, MDNode::get(Ctx, MDVals));
}

if (AttrKindStr == "sycl-sub-group-size") {
Expand All @@ -413,6 +415,21 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) {
MDNode::get(Ctx, MD));
}

if (AttrKindStr == "sycl-max-linear-work-group-size") {
auto MaxLinearSize = getAttributeAsInteger<uint64_t>(Attr);
// Use integer pointer size as closest analogue to size_t.
IntegerType *IntPtrTy = DLayout.getIntPtrType(Ctx);
IntegerType *SizeTTy = Type::getIntNTy(Ctx, IntPtrTy->getBitWidth());
unsigned SizeTBitSize = SizeTTy->getBitWidth();

// Get the integers from the strings.
Metadata *MD = ConstantAsMetadata::get(Constant::getIntegerValue(
SizeTTy, APInt(SizeTBitSize, MaxLinearSize, 10)));

return std::pair<std::string, MDNode *>("max_linear_work_group_size",
MDNode::get(Ctx, MD));
}

// The sycl-single-task attribute currently only has an effect when targeting
// SPIR FPGAs, in which case it will generate a "max_global_work_dim" MD node
// with a 0 value, similar to applying [[intel::max_global_work_dim(0)]] to
Expand Down
73 changes: 49 additions & 24 deletions llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,28 +112,35 @@ std::vector<StringRef> getKernelNamesUsingAssert(const Module &M) {
return SPIRKernelNames;
}

// Gets reqd_work_group_size information for function Func.
std::vector<uint32_t> getKernelReqdWorkGroupSizeMetadata(const Function &Func) {
MDNode *ReqdWorkGroupSizeMD = Func.getMetadata("reqd_work_group_size");
if (!ReqdWorkGroupSizeMD)
// Gets 1- to 3-dimension work-group related information for function Func.
// Returns an empty vector if not present.
template <typename T>
std::vector<T> getKernelWorkGroupMetadata(const Function &Func,
const char *MDName) {
MDNode *WorkGroupMD = Func.getMetadata(MDName);
if (!WorkGroupMD)
return {};
size_t NumOperands = ReqdWorkGroupSizeMD->getNumOperands();
size_t NumOperands = WorkGroupMD->getNumOperands();
assert(NumOperands >= 1 && NumOperands <= 3 &&
"reqd_work_group_size does not have between 1 and 3 operands.");
std::vector<uint32_t> OutVals;
"work-group metadata does not have between 1 and 3 operands.");
std::vector<T> OutVals;
OutVals.reserve(NumOperands);
for (const MDOperand &MDOp : ReqdWorkGroupSizeMD->operands())
for (const MDOperand &MDOp : WorkGroupMD->operands())
OutVals.push_back(mdconst::extract<ConstantInt>(MDOp)->getZExtValue());
return OutVals;
}
// Gets work_group_num_dim information for function Func, conviniently 0 if
// metadata is not present.
uint32_t getKernelWorkGroupNumDim(const Function &Func) {
MDNode *MaxDimMD = Func.getMetadata("work_group_num_dim");
if (!MaxDimMD)
return 0;
assert(MaxDimMD->getNumOperands() == 1 && "Malformed node.");
return mdconst::extract<ConstantInt>(MaxDimMD->getOperand(0))->getZExtValue();

// Gets a single-dimensional piece of information for function Func.
// Returns std::nullopt if metadata is not present.
template <typename T>
std::optional<T> getKernelSingleEltMetadata(const Function &Func,
const char *MDName) {
if (MDNode *MaxDimMD = Func.getMetadata(MDName)) {
assert(MaxDimMD->getNumOperands() == 1 && "Malformed node.");
return mdconst::extract<ConstantInt>(MaxDimMD->getOperand(0))
->getZExtValue();
}
return std::nullopt;
}

PropSetRegTy computeModuleProperties(const Module &M,
Expand Down Expand Up @@ -240,22 +247,40 @@ PropSetRegTy computeModuleProperties(const Module &M,
SmallVector<std::string, 4> MetadataNames;

if (GlobProps.EmitProgramMetadata) {
// Add reqd_work_group_size and work_group_num_dim information to
// program metadata.
// Add various pieces of function metadata to program metadata.
for (const Function &Func : M.functions()) {
std::vector<uint32_t> KernelReqdWorkGroupSize =
getKernelReqdWorkGroupSizeMetadata(Func);
if (!KernelReqdWorkGroupSize.empty()) {
// Note - we're implicitly truncating 64-bit work-group data to 32 bits in
// all work-group related metadata. All current consumers of this program
// metadata format only support SYCL ID queries that fit within MAX_INT.
if (auto KernelReqdWorkGroupSize = getKernelWorkGroupMetadata<uint32_t>(
Func, "reqd_work_group_size");
!KernelReqdWorkGroupSize.empty()) {
MetadataNames.push_back(Func.getName().str() + "@reqd_work_group_size");
PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(),
KernelReqdWorkGroupSize);
}

uint32_t WorkGroupNumDim = getKernelWorkGroupNumDim(Func);
if (WorkGroupNumDim) {
if (auto WorkGroupNumDim = getKernelSingleEltMetadata<uint32_t>(
Func, "work_group_num_dim")) {
MetadataNames.push_back(Func.getName().str() + "@work_group_num_dim");
PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(),
WorkGroupNumDim);
*WorkGroupNumDim);
}

if (auto KernelMaxWorkGroupSize =
getKernelWorkGroupMetadata<uint32_t>(Func, "max_work_group_size");
!KernelMaxWorkGroupSize.empty()) {
MetadataNames.push_back(Func.getName().str() + "@max_work_group_size");
PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(),
KernelMaxWorkGroupSize);
}

if (auto MaxLinearWGSize = getKernelSingleEltMetadata<uint64_t>(
Func, "max_linear_work_group_size")) {
MetadataNames.push_back(Func.getName().str() +
"@max_linear_work_group_size");
PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(),
*MaxLinearWGSize);
}
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/SYCLLowerIR/SYCLCreateNVVMAnnotations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ SYCLCreateNVVMAnnotationsPass::run(Module &M, ModuleAnalysisManager &MAM) {

constexpr static std::pair<const char *, const char *>
SingleValAnnotations[] = {{"min_work_groups_per_cu", "minctasm"},
{"max_work_groups_per_mp", "maxclusterrank"}};
{"max_work_groups_per_mp", "maxclusterrank"},
{"max_linear_work_group_size", "maxntidx"}};

for (auto &[MDName, AnnotationName] : SingleValAnnotations) {
if (MDNode *Node = F.getMetadata(MDName)) {
Expand Down
15 changes: 8 additions & 7 deletions sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,15 @@ if(SYCL_UR_USE_FETCH_CONTENT)
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit 76361a88a0af6502fe655144655b381260ea9706
# Merge: b4195cb 0db57da
set(UNIFIED_RUNTIME_REPO "https://github.com/frasercrmck/unified-runtime.git")

# commit cabf128094eff9ff7b79bdff559640a8a111f0c3
# Merge: a96fcbc5 15bca3b6
# Author: Omar Ahmed <[email protected]>
# Date: Wed, 31 Jul 2024 14:26:38 +0100
# Merge pull request #1961 from DBDuncan/duncan/num-channels-mipmap-fix
# [Bindless][Exp] Fix urBindlessImagesImageGetInfoExp failing with mipmap images
set(UNIFIED_RUNTIME_TAG 76361a88a0af6502fe655144655b381260ea9706)
# Date: Mon Aug 19 16:20:45 2024 +0100
# Merge pull request #1984 from rafbiels/rafbiels/cuda-stream-race-cond
# Fix race condition in CUDA stream creation
set(UNIFIED_RUNTIME_TAG b595cbf4dd67f98d873f5514a22d041306a07b8f)

set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ Jessica Davies, Intel +
Joe Garvey, Intel +
Greg Lueck, Intel +
John Pennycook, Intel +
Roland Schulz, Intel
Roland Schulz, Intel +
Fraser Cormack, Codeplay

== Overview

Expand Down Expand Up @@ -232,6 +233,68 @@ SYCL implementations may introduce additional kernel properties. If any
combinations of kernel attributes are invalid, this must be clearly documented
as part of the new kernel property definition.

=== Kernel Properties for the CUDA backend

The kernel properties specified in this section may only be used to decorate
kernels that are submitted to the CUDA backend. Attempting to submit a kernel
with these properties to another backend has undefined behavior.

```c++
namespace sycl {
namespace ext {
namespace oneapi {
namespace experimental {

struct max_work_group_size_key {
template <size_t... Dims>
using value_t = property_value<max_work_group_size_key, std::integral_constant<size_t, Dims>...>;
}; // max_work_group_size_key

struct max_linear_work_group_size_key {
template <size_t Size>
using value_t = property_value<max_linear_work_group_size_key, std::integral_constant<size_t, Size>>;
}; // max_linear_work_group_size_key

template <size_t... Dims>
inline constexpr max_work_group_size_key::value_t<Dims...> max_work_group_size;

template <size_t Size>
inline constexpr max_linear_work_group_size_key::value_t<Size> max_linear_work_group_size;

template <> struct is_property_key<max_work_group_size_key> : std::true_type {};
template <> struct is_property_key<max_linear_work_group_size_key> : std::true_type {};

} // namespace experimental
} // namespace oneapi
} // namespace ext
} // namespace sycl
```

|===
|Property|Description

|`max_work_group_size`
|The `max_work_group_size` property provides a promise to the compiler
that the kernel will never be launched with a larger work-group than the
specified size. The number of template arguments in the `Dims` parameter pack
must match the dimensionality of the work-group used to invoke the kernel. The
order of the template arguments matches the constructor of the `range` class.

If the kernel is submitted with an `nd_range` that exceeds the size specified
by the property, the implementation must throw a synchronous exception with the
`errc::nd_range` error code.

|`max_linear_work_group_size`
|The `max_linear_work_group_size` property provides a promise to the compiler
that the kernel will never be launched with a work-group for which the return
value of `group::get_local_linear_range()` exceeds the specified amount.

If the kernel is submitted with an `nd_range` that exceeds the size specified
by the property, the implementation must throw a synchronous exception with the
`errc::nd_range` error code.

|===

=== Adding a Property List to a Kernel Launch

To enable properties to be associated with kernels, this extension adds
Expand Down
55 changes: 55 additions & 0 deletions sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,21 @@ struct single_task_kernel_key {
using value_t = property_value<single_task_kernel_key>;
};

struct max_work_group_size_key
: detail::compile_time_property_key<detail::PropKind::MaxWorkGroupSize> {
template <size_t... Dims>
using value_t = property_value<max_work_group_size_key,
std::integral_constant<size_t, Dims>...>;
};

struct max_linear_work_group_size_key
: detail::compile_time_property_key<
detail::PropKind::MaxLinearWorkGroupSize> {
template <size_t Size>
using value_t = property_value<max_linear_work_group_size_key,
std::integral_constant<size_t, Size>>;
};

template <size_t Dim0, size_t... Dims>
struct property_value<work_group_size_key, std::integral_constant<size_t, Dim0>,
std::integral_constant<size_t, Dims>...> {
Expand Down Expand Up @@ -138,6 +153,28 @@ template <> struct property_value<single_task_kernel_key> {
using key_t = single_task_kernel_key;
};

template <size_t Dim0, size_t... Dims>
struct property_value<max_work_group_size_key,
std::integral_constant<size_t, Dim0>,
std::integral_constant<size_t, Dims>...> {
static_assert(sizeof...(Dims) + 1 <= 3,
"max_work_group_size property currently "
"only supports up to three values.");
static_assert(
detail::AllNonZero<Dim0, Dims...>::value,
"max_work_group_size property must only contain non-zero values.");

using key_t = max_work_group_size_key;

constexpr size_t operator[](int Dim) const {
return std::array<size_t, sizeof...(Dims) + 1>{Dim0, Dims...}[Dim];
}
};

template <> struct property_value<max_linear_work_group_size_key> {
using key_t = max_linear_work_group_size_key;
};

template <size_t Dim0, size_t... Dims>
inline constexpr work_group_size_key::value_t<Dim0, Dims...> work_group_size;

Expand All @@ -156,6 +193,14 @@ inline constexpr nd_range_kernel_key::value_t<Dims> nd_range_kernel;

inline constexpr single_task_kernel_key::value_t single_task_kernel;

template <size_t Dim0, size_t... Dims>
inline constexpr max_work_group_size_key::value_t<Dim0, Dims...>
max_work_group_size;

template <size_t Size>
inline constexpr max_linear_work_group_size_key::value_t<Size>
max_linear_work_group_size;

struct work_group_progress_key
: detail::compile_time_property_key<detail::PropKind::WorkGroupProgress> {
template <forward_progress_guarantee Guarantee,
Expand Down Expand Up @@ -283,6 +328,16 @@ template <> struct PropertyMetaInfo<single_task_kernel_key::value_t> {
static constexpr const char *name = "sycl-single-task-kernel";
static constexpr int value = 0;
};
template <size_t Dim0, size_t... Dims>
struct PropertyMetaInfo<max_work_group_size_key::value_t<Dim0, Dims...>> {
static constexpr const char *name = "sycl-max-work-group-size";
static constexpr const char *value = SizeListToStr<Dim0, Dims...>::value;
};
template <size_t Size>
struct PropertyMetaInfo<max_linear_work_group_size_key::value_t<Size>> {
static constexpr const char *name = "sycl-max-linear-work-group-size";
static constexpr size_t value = Size;
};

template <typename T, typename = void>
struct HasKernelPropertiesGetMethod : std::false_type {};
Expand Down
Loading