Skip to content

Commit 24c7dcf

Browse files
committed
Update cuda impl and test.
Signed-off-by: JackAKirk <[email protected]>
1 parent b7468c8 commit 24c7dcf

File tree

2 files changed

+85
-83
lines changed

2 files changed

+85
-83
lines changed

source/adapters/cuda/enqueue.cpp

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -487,12 +487,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
487487
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
488488
ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
489489
const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
490-
uint32_t numAttrsInLaunchAttrList,
491-
const ur_exp_launch_attribute_t *launchAttrList,
490+
uint32_t numPropsInLaunchPropList,
491+
const ur_exp_launch_property_t *launchPropList,
492492
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
493493
ur_event_handle_t *phEvent) {
494494

495-
if (numAttrsInLaunchAttrList == 0) {
495+
if (numPropsInLaunchPropList == 0) {
496496
urEnqueueKernelLaunch(hQueue, hKernel, workDim, nullptr, pGlobalWorkSize,
497497
pLocalWorkSize, numEventsInWaitList, phEventWaitList,
498498
phEvent);
@@ -504,32 +504,33 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
504504
UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
505505
UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
506506

507-
if (launchAttrList == NULL) {
507+
if (launchPropList == NULL) {
508508
return UR_RESULT_ERROR_INVALID_NULL_POINTER;
509509
}
510510

511-
std::vector<CUlaunchAttribute> launch_attribute(numAttrsInLaunchAttrList);
512-
for (uint32_t i = 0; i < numAttrsInLaunchAttrList; i++) {
513-
switch (launchAttrList[i].id) {
514-
case UR_EXP_LAUNCH_ATTRIBUTE_ID_IGNORE: {
511+
std::vector<CUlaunchAttribute> launch_attribute(numPropsInLaunchPropList);
512+
for (uint32_t i = 0; i < numPropsInLaunchPropList; i++) {
513+
switch (launchPropList[i].id) {
514+
case UR_EXP_LAUNCH_PROPERTY_ID_IGNORE: {
515515
launch_attribute[i].id = CU_LAUNCH_ATTRIBUTE_IGNORE;
516516
break;
517517
}
518-
case UR_EXP_LAUNCH_ATTRIBUTE_ID_CLUSTER_DIMENSION: {
518+
case UR_EXP_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION: {
519519

520520
launch_attribute[i].id = CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION;
521+
// Note that cuda orders from right to left wrt SYCL dimensional order.
521522
launch_attribute[i].value.clusterDim.x =
522-
launchAttrList[i].value.clusterDim[0];
523+
launchPropList[i].value.clusterDim[2];
523524
launch_attribute[i].value.clusterDim.y =
524-
launchAttrList[i].value.clusterDim[1];
525+
launchPropList[i].value.clusterDim[1];
525526
launch_attribute[i].value.clusterDim.z =
526-
launchAttrList[i].value.clusterDim[2];
527+
launchPropList[i].value.clusterDim[0];
527528
break;
528529
}
529-
case UR_EXP_LAUNCH_ATTRIBUTE_ID_COOPERATIVE: {
530+
case UR_EXP_LAUNCH_PROPERTY_ID_COOPERATIVE: {
530531
launch_attribute[i].id = CU_LAUNCH_ATTRIBUTE_COOPERATIVE;
531532
launch_attribute[i].value.cooperative =
532-
launchAttrList[i].value.cooperative;
533+
launchPropList[i].value.cooperative;
533534
break;
534535
}
535536
default: {
@@ -538,7 +539,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
538539
}
539540
}
540541

541-
std::vector<ur_event_handle_t> DepEvents(
542+
std::vector<ur_event_handle_t> DepEvents(
542543
phEventWaitList, phEventWaitList + numEventsInWaitList);
543544
std::vector<std::pair<ur_mem_handle_t, ur_lock>> MemMigrationLocks;
544545

@@ -587,8 +588,8 @@ std::vector<ur_event_handle_t> DepEvents(
587588
// using the standard UR_CHECK_ERROR
588589
if (ur_result_t Ret =
589590
setKernelParams(hQueue->getContext(), hQueue->Device, workDim,
590-
nullptr, pGlobalWorkSize, pLocalWorkSize,
591-
hKernel, CuFunc, ThreadsPerBlock, BlocksPerGrid);
591+
nullptr, pGlobalWorkSize, pLocalWorkSize, hKernel,
592+
CuFunc, ThreadsPerBlock, BlocksPerGrid);
592593
Ret != UR_RESULT_SUCCESS)
593594
return Ret;
594595

@@ -647,7 +648,7 @@ std::vector<ur_event_handle_t> DepEvents(
647648
launch_config.sharedMemBytes = LocalSize;
648649
launch_config.hStream = CuStream;
649650
launch_config.attrs = &launch_attribute[0];
650-
launch_config.numAttrs = numAttrsInLaunchAttrList;
651+
launch_config.numAttrs = numPropsInLaunchPropList;
651652

652653
UR_CHECK_ERROR(cuLaunchKernelEx(&launch_config, CuFunc,
653654
const_cast<void **>(ArgIndices.data()),

test/conformance/exp_launch_attributes/launch_attributes.cpp

Lines changed: 66 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -6,91 +6,92 @@
66
#include <uur/fixtures.h>
77

88
struct urEnqueueKernelLaunchCustomTest : uur::urKernelExecutionTest {
9-
void SetUp() override {
10-
program_name = "fill";
11-
UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp());
12-
}
13-
14-
uint32_t val = 42;
15-
size_t global_size = 32;
16-
size_t global_offset = 0;
17-
size_t n_dimensions = 1;
9+
void SetUp() override {
10+
program_name = "fill";
11+
UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp());
12+
}
13+
14+
uint32_t val = 42;
15+
size_t global_size = 32;
16+
size_t global_offset = 0;
17+
size_t n_dimensions = 1;
1818
};
1919
UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchCustomTest);
2020

2121
TEST_P(urEnqueueKernelLaunchCustomTest, Success) {
2222

23-
size_t returned_size;
24-
ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0, nullptr,
25-
&returned_size));
23+
size_t returned_size;
24+
ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0,
25+
nullptr, &returned_size));
2626

27-
std::unique_ptr<char[]> returned_extensions(new char[returned_size]);
27+
std::unique_ptr<char[]> returned_extensions(new char[returned_size]);
2828

29-
ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS,
30-
returned_size, returned_extensions.get(),
31-
nullptr));
29+
ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS,
30+
returned_size, returned_extensions.get(),
31+
nullptr));
3232

33-
std::string_view extensions_string(returned_extensions.get());
34-
const bool launch_attributes_support =
35-
extensions_string.find(UR_LAUNCH_ATTRIBUTES_EXTENSION_STRING_EXP) !=
36-
std::string::npos;
33+
std::string_view extensions_string(returned_extensions.get());
34+
const bool launch_properties_support =
35+
extensions_string.find(UR_LAUNCH_PROPERTIES_EXTENSION_STRING_EXP) !=
36+
std::string::npos;
3737

38-
if (!launch_attributes_support) {
39-
GTEST_SKIP() << "EXP launch attributes feature is not supported.";
40-
}
38+
if (!launch_properties_support) {
39+
GTEST_SKIP() << "EXP launch properties feature is not supported.";
40+
}
4141

42-
std::vector<ur_exp_launch_attribute_t> attrs(1);
43-
attrs[0].id = UR_EXP_LAUNCH_ATTRIBUTE_ID_IGNORE;
42+
std::vector<ur_exp_launch_property_t> props(1);
43+
props[0].id = UR_EXP_LAUNCH_PROPERTY_ID_IGNORE;
4444

45-
ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_PROFILE, 0, nullptr,
46-
&returned_size));
45+
ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_PROFILE, 0, nullptr,
46+
&returned_size));
4747

48-
std::unique_ptr<char[]> returned_backend(new char[returned_size]);
48+
std::unique_ptr<char[]> returned_backend(new char[returned_size]);
4949

50-
ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_PROFILE, returned_size,
51-
returned_backend.get(), nullptr));
50+
ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_PROFILE,
51+
returned_size, returned_backend.get(),
52+
nullptr));
5253

53-
std::string_view backend_string(returned_backend.get());
54-
const bool cuda_backend = backend_string.find("CUDA") != std::string::npos;
54+
std::string_view backend_string(returned_backend.get());
55+
const bool cuda_backend = backend_string.find("CUDA") != std::string::npos;
5556

56-
if (cuda_backend) {
57-
ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_VERSION, 0, nullptr,
58-
&returned_size));
57+
if (cuda_backend) {
58+
ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_VERSION, 0,
59+
nullptr, &returned_size));
5960

60-
std::unique_ptr<char[]> returned_compute_capability(
61-
new char[returned_size]);
61+
std::unique_ptr<char[]> returned_compute_capability(
62+
new char[returned_size]);
6263

63-
ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_VERSION,
64-
returned_size,
65-
returned_compute_capability.get(), nullptr));
64+
ASSERT_SUCCESS(
65+
urDeviceGetInfo(device, UR_DEVICE_INFO_VERSION, returned_size,
66+
returned_compute_capability.get(), nullptr));
6667

67-
auto compute_capability =
68-
std::stof(std::string(returned_compute_capability.get()));
68+
auto compute_capability =
69+
std::stof(std::string(returned_compute_capability.get()));
6970

70-
if (compute_capability >= 6.0) {
71-
ur_exp_launch_attribute_t coop_attr;
72-
coop_attr.id = UR_EXP_LAUNCH_ATTRIBUTE_ID_COOPERATIVE;
73-
coop_attr.value.cooperative = 1;
74-
attrs.push_back(coop_attr);
75-
}
71+
if (compute_capability >= 6.0) {
72+
ur_exp_launch_property_t coop_prop;
73+
coop_prop.id = UR_EXP_LAUNCH_PROPERTY_ID_COOPERATIVE;
74+
coop_prop.value.cooperative = 1;
75+
props.push_back(coop_prop);
76+
}
7677

77-
if (compute_capability >= 9.0) {
78-
ur_exp_launch_attribute_t cluster_dims_attr;
79-
cluster_dims_attr.id = UR_EXP_LAUNCH_ATTRIBUTE_ID_CLUSTER_DIMENSION;
80-
cluster_dims_attr.value.clusterDim[0] = 32;
81-
cluster_dims_attr.value.clusterDim[1] = 1;
82-
cluster_dims_attr.value.clusterDim[2] = 1;
78+
if (compute_capability >= 9.0) {
79+
ur_exp_launch_property_t cluster_dims_prop;
80+
cluster_dims_prop.id = UR_EXP_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION;
81+
cluster_dims_prop.value.clusterDim[0] = 1;
82+
cluster_dims_prop.value.clusterDim[1] = 1;
83+
cluster_dims_prop.value.clusterDim[2] = 1;
8384

84-
attrs.push_back(cluster_dims_attr);
85+
props.push_back(cluster_dims_prop);
86+
}
8587
}
86-
}
87-
ur_mem_handle_t buffer = nullptr;
88-
AddBuffer1DArg(sizeof(val) * global_size, &buffer);
89-
AddPodArg(val);
90-
91-
ASSERT_SUCCESS(urEnqueueKernelLaunchCustomExp(
92-
queue, kernel, n_dimensions, &global_size, nullptr, 1, &attrs[0], 0,
93-
nullptr, nullptr));
94-
ASSERT_SUCCESS(urQueueFinish(queue));
95-
ValidateBuffer(buffer, sizeof(val) * global_size, val);
88+
ur_mem_handle_t buffer = nullptr;
89+
AddBuffer1DArg(sizeof(val) * global_size, &buffer);
90+
AddPodArg(val);
91+
92+
ASSERT_SUCCESS(urEnqueueKernelLaunchCustomExp(
93+
queue, kernel, n_dimensions, &global_size, nullptr, 1, &props[0], 0,
94+
nullptr, nullptr));
95+
ASSERT_SUCCESS(urQueueFinish(queue));
96+
ValidateBuffer(buffer, sizeof(val) * global_size, val);
9697
}

0 commit comments

Comments
 (0)