diff --git a/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp b/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp deleted file mode 100644 index 148389bce5354..0000000000000 --- a/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// REQUIRES: level_zero || cuda - -// UNSUPPORTED: windows && gpu-intel-gen12 -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/21556 - -// RUN: %{build} -o %t2.out -// RUN: env SYCL_UR_TRACE=2 UR_L0_DEBUG=1 %{run} %t2.out %if level_zero %{ 2>&1 | FileCheck %s %} -// RUN: %{run} %t2.out - -#include - -#include -#include - -using namespace sycl; - -int main() { - { - int data1[10] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; - { - buffer a(data1, range<1>(10), {property::buffer::use_host_ptr()}); - buffer b( - range<1>(10), - {ext::oneapi::property::buffer::use_pinned_host_memory()}); - queue myQueue; - myQueue.submit([&](handler &cgh) { - auto A = a.get_access(cgh); - auto B = b.get_access(cgh); - cgh.parallel_for(range<1>{10}, [=](id<1> index) { - B[index] = 0; - A[index] = B[index] + 1; - }); - }); - } // Data is copied back because there is a user side shared_ptr - for (int i = 0; i < 10; i++) - assert(data1[i] == 1); - } -} - -// CHECK: <--- urMemBufferCreate -// CHECK: <--- urMemBufferCreate -// CHECK-SAME: UR_MEM_FLAG_ALLOC_HOST_POINTER diff --git a/sycl/test-e2e/Basic/buffer/native_buffer_creation_flags.cpp b/sycl/test-e2e/Basic/buffer/native_buffer_creation_flags.cpp deleted file mode 100644 index 2fa49525a738b..0000000000000 --- a/sycl/test-e2e/Basic/buffer/native_buffer_creation_flags.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// REQUIRES: cpu -// RUN: %{build} -o %t.out -// RUN: env SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s - -#include - -class Foo; -using namespace sycl; -int main() { - const int BufVal = 42; - buffer Buf{&BufVal, range<1>(1)}; - queue Q; - - { - // This should trigger memory allocation on host since the pointer passed by - // the user is read-only. - host_accessor BufAcc(Buf, write_only); - } - - Q.submit([&](handler &Cgh) { - // Now that we have a read-write host allocation, check that the native - // buffer is created with the UR_MEM_FLAG_USE_HOST_POINTER flag. - // CHECK: <--- urMemBufferCreate - // CHECK-SAME: UR_MEM_FLAG_USE_HOST_POINTER - auto BufAcc = Buf.get_access(Cgh); - Cgh.single_task([=]() { int A = BufAcc[0]; }); - }); -} diff --git a/sycl/test-e2e/Basic/buffer/subbuffer_overlap.cpp b/sycl/test-e2e/Basic/buffer/subbuffer_overlap.cpp deleted file mode 100644 index aa81fed1c19d3..0000000000000 --- a/sycl/test-e2e/Basic/buffer/subbuffer_overlap.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out -// RUN: env SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s - -#include - -int main() { - sycl::queue q; - sycl::buffer b{1024}; - sycl::id<1> start_offset{64}; - size_t size = 16; - sycl::buffer sub1{b, start_offset, sycl::range<1>{size}}; - sycl::buffer sub2{b, start_offset, sycl::range<1>{size * 2}}; - - int idx = 0; - for (auto &e : sycl::host_accessor{b}) - e = idx++ % size; - - // CHECK: <--- urMemBufferPartition - // CHECK: .origin = 256, .size = 64 - q.submit([&](sycl::handler &cgh) { - sycl::accessor acc{sub1, cgh}; - cgh.parallel_for(size, [=](auto id) { acc[id] += 1; }); - }); - // CHECK: <--- urMemBufferPartition - // CHECK: .origin = 256, .size = 128 - q.submit([&](sycl::handler &cgh) { - sycl::accessor acc{sub2, cgh}; - cgh.parallel_for(size * 2, [=](auto id) { acc[id] -= 1; }); - }); - - // Print before asserts to ensure stream is flushed. - for (auto &e : sycl::host_accessor{sub2}) - std::cout << e << " "; - std::cout << std::endl; - - idx = 0; - for (auto &e : sycl::host_accessor{sub2}) { - assert(e == idx % size - idx / size); - ++idx; - } - - return 0; -} diff --git a/sycl/test-e2e/Basic/fill_accessor_ur.cpp b/sycl/test-e2e/Basic/fill_accessor_ur.cpp deleted file mode 100644 index 6643845de4c3d..0000000000000 --- a/sycl/test-e2e/Basic/fill_accessor_ur.cpp +++ /dev/null @@ -1,147 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s - -// This test merely checks the use of the correct UR call. Its sister test -// fill_accessor.cpp thoroughly checks the workings of the .fill() call. - -#include -constexpr int width = 32; -constexpr int height = 16; -constexpr int depth = 8; -constexpr int total_2D = width * height; -constexpr int total_3D = width * height * depth; - -void testFill_Buffer1D() { - std::vector data_1D(width, 0); - { - sycl::buffer buffer_1D(data_1D.data(), sycl::range<1>(width)); - - sycl::queue q; - std::cout << "start testFill_Buffer1D" << std::endl; - q.submit([&](sycl::handler &cgh) { - auto acc1D = buffer_1D.get_access(cgh); - // should stage urEnqueueMemBufferFill - cgh.fill(acc1D, float{1}); - }); - q.wait(); - - std::cout << "start testFill_Buffer1D -- OFFSET" << std::endl; - q.submit([&](sycl::handler &cgh) { - auto acc1DOffset = - buffer_1D.get_access(cgh, {4}, {2}); - // despite being offset, should stage urEnqueueMemBufferFill - cgh.fill(acc1DOffset, float{2}); - }); - q.wait(); - } // ~buffer - - // quick check. fill_accessor.cpp is more thorough. - assert(data_1D[1] == 1); - assert(data_1D[2] == 2); -} - -void testFill_Buffer2D() { - std::vector data_2D(total_2D, 0); - { - sycl::buffer buffer_2D(data_2D.data(), - sycl::range<2>(height, width)); - - sycl::queue q; - std::cout << "start testFill_Buffer2D" << std::endl; - q.submit([&](sycl::handler &cgh) { - auto acc2D = buffer_2D.get_access(cgh); - // should stage urEnqueueMemBufferFill - cgh.fill(acc2D, float{3}); - }); - q.wait(); - - std::cout << "start testFill_Buffer2D -- OFFSET" << std::endl; - q.submit([&](sycl::handler &cgh) { - auto acc2D = - buffer_2D.get_access(cgh, {8, 12}, {2, 2}); - // "ranged accessor" will have to be handled by custom kernel: - // urEnqueueKernelLaunchWithArgsExp - cgh.fill(acc2D, float{4}); - }); - q.wait(); - } // ~buffer - - // quick check. fill_accessor.cpp is more thorough. - assert(data_2D[(1 * width) + 1] == 3); // [1][1] sb 3 - assert(data_2D[(2 * width) + 2] == 4); // [2][2] sb 4 -} - -void testFill_Buffer3D() { - std::vector data_3D(total_3D, 0); - { - sycl::buffer buffer_3D(data_3D.data(), - sycl::range<3>(depth, height, width)); - - sycl::queue q; - std::cout << "start testFill_Buffer3D" << std::endl; - q.submit([&](sycl::handler &cgh) { - auto acc3D = buffer_3D.get_access(cgh); - // should stage urEnqueueMemBufferFill - cgh.fill(acc3D, float{5}); - }); - q.wait(); - - std::cout << "start testFill_Buffer3D -- OFFSET" << std::endl; - q.submit([&](sycl::handler &cgh) { - auto acc3D = buffer_3D.get_access( - cgh, {4, 8, 12}, {3, 3, 3}); - // "ranged accessor" will have to be handled by custom kernel: - // urEnqueueKernelLaunchWithArgsExp - cgh.fill(acc3D, float{6}); - }); - q.wait(); - } // ~buffer - - // quick check. fill_accessor.cpp is more thorough. - assert(data_3D[(1 * height * width) + (1 * width) + 1] == - 5); // [1][1][1] sb 5 - assert(data_3D[(3 * height * width) + (3 * width) + 3] == - 6); // [3][3][3] sb 6 -} - -void testFill_ZeroDim() { - sycl::range<1> r{1}; - std::vector data_0D(1, 0); - { - sycl::buffer Buffer(data_0D.data(), r); - sycl::queue q; - std::cout << "start testFill_ZeroDim" << std::endl; - q.submit([&](sycl::handler &cgh) { - sycl::accessor Acc0(Buffer, cgh); - cgh.fill(Acc0, float{1}); - }); - q.wait(); - } - assert(data_0D[0] == 1); -} - -int main() { - testFill_Buffer1D(); - testFill_Buffer2D(); - testFill_Buffer3D(); - testFill_ZeroDim(); - return 0; -} - -// CHECK: start testFill_Buffer1D -// CHECK: <--- urEnqueueMemBufferFill -// CHECK: start testFill_Buffer1D -- OFFSET -// CHECK: <--- urEnqueueMemBufferFill - -// CHECK: start testFill_Buffer2D -// CHECK: <--- urEnqueueMemBufferFill -// CHECK: start testFill_Buffer2D -- OFFSET -// CHECK: <--- urEnqueueKernelLaunchWithArgsExp - -// CHECK: start testFill_Buffer3D -// CHECK: <--- urEnqueueMemBufferFill -// CHECK: start testFill_Buffer3D -- OFFSET -// CHECK: <--- urEnqueueKernelLaunchWithArgsExp - -// CHECK: start testFill_ZeroDim -// CHECK: <--- urEnqueueMemBufferFill diff --git a/sycl/test-e2e/Basic/use_pinned_host_memory.cpp b/sycl/test-e2e/Basic/use_pinned_host_memory.cpp deleted file mode 100644 index cba9c575e8497..0000000000000 --- a/sycl/test-e2e/Basic/use_pinned_host_memory.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// REQUIRES: cpu -// RUN: %{build} -o %t.out -// RUN: env SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s - -#include - -#include -#include - -int main() { - const sycl::range<1> N{1}; - sycl::buffer Buf( - N, {sycl::ext::oneapi::property::buffer::use_pinned_host_memory()}); - if (!Buf.has_property< - sycl::ext::oneapi::property::buffer::use_pinned_host_memory>()) { - std::cerr << "Buffer should have the use_pinned_host_memory property" - << std::endl; - return 1; - } - - sycl::queue Q; - Q.submit([&](sycl::handler &CGH) { - auto Acc = Buf.get_access(CGH); - CGH.single_task([=]() {}); - }); - - try { - int Data = 0; - sycl::buffer Buf( - &Data, N, - {sycl::ext::oneapi::property::buffer::use_pinned_host_memory()}); - // Expected that exception is thrown - return 1; - } catch (sycl::exception &E) { - if (E.code() != sycl::errc::invalid || - std::string(E.what()).find( - "The use_pinned_host_memory cannot be used with host pointer") == - std::string::npos) { - return 1; - } - - return 0; - } -} - -// CHECK: <--- urMemBufferCreate -// CHECK-SAME: UR_MEM_FLAG_ALLOC_HOST_POINTER diff --git a/sycl/test-e2e/EnqueueFunctions/barrier.cpp b/sycl/test-e2e/EnqueueFunctions/barrier.cpp deleted file mode 100644 index bdc510bb8e447..0000000000000 --- a/sycl/test-e2e/EnqueueFunctions/barrier.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: env SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s - -// Tests the enqueue free function barriers. - -#include -#include - -namespace oneapiext = sycl::ext::oneapi::experimental; - -int main() { - sycl::context Context; - sycl::queue Q1(Context, sycl::default_selector_v); - - oneapiext::single_task(Q1, []() {}); - oneapiext::single_task(Q1, []() {}); - - oneapiext::barrier(Q1); - - oneapiext::single_task(Q1, []() {}); - oneapiext::single_task(Q1, []() {}); - - oneapiext::barrier(Q1); - - sycl::queue Q2(Context, sycl::default_selector_v); - sycl::queue Q3(Context, sycl::default_selector_v); - - sycl::event Event1 = oneapiext::submit_with_event( - Q1, [&](sycl::handler &CGH) { oneapiext::single_task(CGH, []() {}); }); - - sycl::event Event2 = oneapiext::submit_with_event( - Q2, [&](sycl::handler &CGH) { oneapiext::single_task(CGH, []() {}); }); - - oneapiext::partial_barrier(Q3, {Event1, Event2}); - - oneapiext::single_task(Q3, []() {}); - - sycl::event Event3 = oneapiext::submit_with_event( - Q1, [&](sycl::handler &CGH) { oneapiext::single_task(CGH, []() {}); }); - - sycl::event Event4 = oneapiext::submit_with_event( - Q2, [&](sycl::handler &CGH) { oneapiext::single_task(CGH, []() {}); }); - - oneapiext::partial_barrier(Q3, {Event3, Event4}); - - oneapiext::single_task(Q3, []() {}); - - Q1.wait(); - - return 0; -} - -// CHECK-COUNT-4: <--- urEnqueueEventsWaitWithBarrierExt -// CHECK-NOT: <--- urEnqueueEventsWaitWithBarrierExt diff --git a/sycl/test-e2e/EnqueueFunctions/mem_advise.cpp b/sycl/test-e2e/EnqueueFunctions/mem_advise.cpp deleted file mode 100644 index c1cba59f4ff53..0000000000000 --- a/sycl/test-e2e/EnqueueFunctions/mem_advise.cpp +++ /dev/null @@ -1,40 +0,0 @@ -// REQUIRES: aspect-usm_shared_allocations -// RUN: %{build} -o %t.out -// RUN: env SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s - -// Tests the enqueue free function mem_advise. - -#include -#include -#include - -namespace oneapiext = sycl::ext::oneapi::experimental; - -constexpr size_t N = 1024; - -int main() { - sycl::context Context; - sycl::queue Q(Context, sycl::default_selector_v); - int *Memory = sycl::malloc_shared(N, Q); - - constexpr size_t ChunkSize = N / 3; - - oneapiext::mem_advise(Q, Memory, ChunkSize, 0); - - oneapiext::submit(Q, [&](sycl::handler &CGH) { - oneapiext::mem_advise(CGH, Memory + ChunkSize, ChunkSize, 0); - }); - - sycl::event E = oneapiext::submit_with_event(Q, [&](sycl::handler &CGH) { - oneapiext::mem_advise(CGH, Memory + ChunkSize * 2, ChunkSize, 0); - }); - - E.wait(); - Q.wait(); - sycl::free(Memory, Q); - - return 0; -} - -// CHECK-COUNT-3: <--- urEnqueueUSMAdvise -// CHECK-NOT: <--- urEnqueueUSMAdvise diff --git a/sycl/test-e2e/EnqueueFunctions/prefetch.cpp b/sycl/test-e2e/EnqueueFunctions/prefetch.cpp deleted file mode 100644 index 94be68fe05b67..0000000000000 --- a/sycl/test-e2e/EnqueueFunctions/prefetch.cpp +++ /dev/null @@ -1,39 +0,0 @@ -// REQUIRES: aspect-usm_shared_allocations -// RUN: %{build} -o %t.out -// RUN: env SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s - -// Tests the enqueue free function prefetch. - -#include -#include -#include - -namespace oneapiext = sycl::ext::oneapi::experimental; - -constexpr size_t N = 1024; -constexpr size_t ChunkSize = N / 3; - -int main() { - sycl::context Context; - sycl::queue Q(Context, sycl::default_selector_v); - int *Memory = sycl::malloc_shared(N, Q); - - oneapiext::prefetch(Q, Memory, ChunkSize); - - oneapiext::submit(Q, [&](sycl::handler &CGH) { - oneapiext::prefetch(CGH, Memory + ChunkSize, ChunkSize); - }); - - sycl::event E = oneapiext::submit_with_event(Q, [&](sycl::handler &CGH) { - oneapiext::prefetch(CGH, Memory + ChunkSize * 2, ChunkSize); - }); - - E.wait(); - Q.wait(); - sycl::free(Memory, Q); - - return 0; -} - -// CHECK-COUNT-3: <--- urEnqueueUSMPrefetch -// CHECK-NOT: <--- urEnqueueUSMPrefetch diff --git a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp index 9e6366ce16abf..69bf0bcdf7253 100644 --- a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp +++ b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp @@ -12,6 +12,8 @@ #include #include +#include + using namespace sycl; using namespace FreeFunctionEventsHelpers; @@ -19,6 +21,40 @@ namespace oneapiext = ext::oneapi::experimental; namespace { +struct PrefetchCallRecord { + const void *Ptr; + size_t Size; +}; + +struct MemAdviseCallRecord { + const void *Ptr; + size_t Size; + ur_usm_advice_flags_t Advice; +}; + +static std::vector PrefetchCallRecords; +static std::vector MemAdviseCallRecords; +static size_t counter_urEnqueueEventsWaitWithBarrierExt = 0; + +inline ur_result_t after_urUSMEnqueuePrefetchRecord(void *pParams) { + auto Params = *static_cast(pParams); + PrefetchCallRecords.push_back({*Params.ppMem, *Params.psize}); + return UR_RESULT_SUCCESS; +} + +inline ur_result_t after_urUSMEnqueueMemAdviseRecord(void *pParams) { + auto Params = *static_cast(pParams); + MemAdviseCallRecords.push_back( + {*Params.ppMem, *Params.psize, *Params.padvice}); + return UR_RESULT_SUCCESS; +} + +inline ur_result_t after_urEnqueueEventsWaitWithBarrierExtRecord(void *pParams) { + (void)pParams; + ++counter_urEnqueueEventsWaitWithBarrierExt; + return UR_RESULT_SUCCESS; +} + class EnqueueFunctionsEventsTests : public ::testing::Test { public: EnqueueFunctionsEventsTests() @@ -33,6 +69,9 @@ class EnqueueFunctionsEventsTests : public ::testing::Test { counter_urUSMEnqueuePrefetch = 0; counter_urUSMEnqueueMemAdvise = 0; counter_urEnqueueEventsWaitWithBarrier = 0; + counter_urEnqueueEventsWaitWithBarrierExt = 0; + PrefetchCallRecords.clear(); + MemAdviseCallRecords.clear(); } unittest::UrMock<> Mock; @@ -355,6 +394,41 @@ TEST_F(EnqueueFunctionsEventsTests, PrefetchShortcutNoEvent) { free(Dst, Q); } +TEST_F(EnqueueFunctionsEventsTests, PrefetchAllFormsUseExpectedUrCalls) { + mock::getCallbacks().set_after_callback("urEnqueueUSMPrefetch", + &after_urUSMEnqueuePrefetchRecord); + + constexpr size_t N = 1024; + constexpr size_t ChunkSize = N / 3; + int *Memory = malloc_shared(N, Q); + + oneapiext::prefetch(Q, Memory, ChunkSize); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::prefetch(CGH, Memory + ChunkSize, ChunkSize); + }); + + event E = oneapiext::submit_with_event(Q, [&](handler &CGH) { + oneapiext::prefetch(CGH, Memory + ChunkSize * 2, ChunkSize); + }); + + E.wait(); + Q.wait(); + + ASSERT_EQ(PrefetchCallRecords.size(), size_t{3}); + EXPECT_EQ(PrefetchCallRecords[0].Ptr, + reinterpret_cast(Memory)); + EXPECT_EQ(PrefetchCallRecords[0].Size, ChunkSize); + EXPECT_EQ(PrefetchCallRecords[1].Ptr, + reinterpret_cast(Memory + ChunkSize)); + EXPECT_EQ(PrefetchCallRecords[1].Size, ChunkSize); + EXPECT_EQ(PrefetchCallRecords[2].Ptr, + reinterpret_cast(Memory + ChunkSize * 2)); + EXPECT_EQ(PrefetchCallRecords[2].Size, ChunkSize); + + free(Memory, Q); +} + TEST_F(EnqueueFunctionsEventsTests, SubmitMemAdviseNoEvent) { mock::getCallbacks().set_replace_callback("urEnqueueUSMAdvise", redefined_urUSMEnqueueMemAdvise); @@ -385,6 +459,95 @@ TEST_F(EnqueueFunctionsEventsTests, MemAdviseShortcutNoEvent) { free(Dst, Q); } +TEST_F(EnqueueFunctionsEventsTests, MemAdviseAllFormsUseExpectedUrCalls) { + mock::getCallbacks().set_after_callback("urEnqueueUSMAdvise", + &after_urUSMEnqueueMemAdviseRecord); + + constexpr size_t N = 1024; + constexpr size_t ChunkSize = N / 3; + int *Memory = malloc_shared(N, Q); + + oneapiext::mem_advise(Q, Memory, ChunkSize, 0); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::mem_advise(CGH, Memory + ChunkSize, ChunkSize, 0); + }); + + event E = oneapiext::submit_with_event(Q, [&](handler &CGH) { + oneapiext::mem_advise(CGH, Memory + ChunkSize * 2, ChunkSize, 0); + }); + + E.wait(); + Q.wait(); + + ASSERT_EQ(MemAdviseCallRecords.size(), size_t{3}); + EXPECT_EQ(MemAdviseCallRecords[0].Ptr, + reinterpret_cast(Memory)); + EXPECT_EQ(MemAdviseCallRecords[0].Size, ChunkSize); + EXPECT_EQ(MemAdviseCallRecords[0].Advice, ur_usm_advice_flags_t{0}); + + EXPECT_EQ(MemAdviseCallRecords[1].Ptr, + reinterpret_cast(Memory + ChunkSize)); + EXPECT_EQ(MemAdviseCallRecords[1].Size, ChunkSize); + EXPECT_EQ(MemAdviseCallRecords[1].Advice, ur_usm_advice_flags_t{0}); + + EXPECT_EQ(MemAdviseCallRecords[2].Ptr, + reinterpret_cast(Memory + ChunkSize * 2)); + EXPECT_EQ(MemAdviseCallRecords[2].Size, ChunkSize); + EXPECT_EQ(MemAdviseCallRecords[2].Advice, ur_usm_advice_flags_t{0}); + + free(Memory, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, + BarrierAndPartialBarrierUseExpectedUrCalls) { + mock::getCallbacks().set_after_callback( + "urEnqueueEventsWaitWithBarrierExt", + &after_urEnqueueEventsWaitWithBarrierExtRecord); + + context Ctx; + queue Q1(Ctx, default_selector_v); + + oneapiext::single_task(Q1, []() {}); + oneapiext::single_task(Q1, []() {}); + oneapiext::barrier(Q1); + + oneapiext::single_task(Q1, []() {}); + oneapiext::single_task(Q1, []() {}); + oneapiext::barrier(Q1); + + queue Q2(Ctx, default_selector_v); + queue Q3(Ctx, default_selector_v); + + event Event1 = oneapiext::submit_with_event(Q1, [&](handler &CGH) { + oneapiext::single_task(CGH, []() {}); + }); + + event Event2 = oneapiext::submit_with_event(Q2, [&](handler &CGH) { + oneapiext::single_task(CGH, []() {}); + }); + + oneapiext::partial_barrier(Q3, {Event1, Event2}); + oneapiext::single_task(Q3, []() {}); + + event Event3 = oneapiext::submit_with_event(Q1, [&](handler &CGH) { + oneapiext::single_task(CGH, []() {}); + }); + + event Event4 = oneapiext::submit_with_event(Q2, [&](handler &CGH) { + oneapiext::single_task(CGH, []() {}); + }); + + oneapiext::partial_barrier(Q3, {Event3, Event4}); + oneapiext::single_task(Q3, []() {}); + + Q1.wait(); + Q2.wait(); + Q3.wait(); + + ASSERT_EQ(counter_urEnqueueEventsWaitWithBarrierExt, size_t{4}); +} + TEST_F(EnqueueFunctionsEventsTests, BarrierBeforeHostTask) { // Special test for case where host_task need an event after, so a barrier is // enqueued to create a usable event. diff --git a/sycl/unittests/buffer/BufferUrApi.cpp b/sycl/unittests/buffer/BufferUrApi.cpp new file mode 100644 index 0000000000000..5588d31bd34af --- /dev/null +++ b/sycl/unittests/buffer/BufferUrApi.cpp @@ -0,0 +1,285 @@ +//==----------- BufferUrApi.cpp - check buffer-related UR calls -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include +#include + +#include +#include +#include +#include + +using namespace sycl; + +namespace { + +static size_t NumMemBufferFillCalls = 0; +static size_t NumKernelLaunchWithArgsExpCalls = 0; +static std::vector BufferPartitionRegions; +static std::vector BufferCreateFlags; + +inline ur_result_t after_urEnqueueMemBufferFill(void *pParams) { + (void)pParams; + ++NumMemBufferFillCalls; + return UR_RESULT_SUCCESS; +} + +inline ur_result_t after_urEnqueueKernelLaunchWithArgsExp(void *pParams) { + (void)pParams; + ++NumKernelLaunchWithArgsExpCalls; + return UR_RESULT_SUCCESS; +} + +inline ur_result_t after_urMemBufferPartition(void *pParams) { + auto Params = *static_cast(pParams); + BufferPartitionRegions.push_back(**Params.ppRegion); + return UR_RESULT_SUCCESS; +} + +inline ur_result_t before_urMemBufferCreate(void *pParams) { + auto Params = *static_cast(pParams); + BufferCreateFlags.push_back(*Params.pflags); + return UR_RESULT_SUCCESS; +} + +class BufferUrApiTests : public ::testing::Test { +public: + BufferUrApiTests() + : Mock{}, Q{context(sycl::platform()), default_selector_v} {} + +protected: + void SetUp() override { + NumMemBufferFillCalls = 0; + NumKernelLaunchWithArgsExpCalls = 0; + BufferPartitionRegions.clear(); + BufferCreateFlags.clear(); + } + + unittest::UrMock<> Mock; + queue Q; +}; + +TEST_F(BufferUrApiTests, FillAccessorUsesExpectedUrCommandTypes) { + mock::getCallbacks().set_after_callback("urEnqueueMemBufferFill", + &after_urEnqueueMemBufferFill); + mock::getCallbacks().set_after_callback( + "urEnqueueKernelLaunchWithArgsExp", + &after_urEnqueueKernelLaunchWithArgsExp); + + constexpr int Width = 32; + constexpr int Height = 16; + constexpr int Depth = 8; + + std::vector Data1D(Width, 0.0f); + std::vector Data2D(Width * Height, 0.0f); + std::vector Data3D(Width * Height * Depth, 0.0f); + std::vector Data0D(1, 0.0f); + + buffer Buffer1D(Data1D.data(), range<1>(Width)); + buffer Buffer2D(Data2D.data(), range<2>(Height, Width)); + buffer Buffer3D(Data3D.data(), range<3>(Depth, Height, Width)); + buffer Buffer0D(Data0D.data(), range<1>(1)); + + auto ExpectDelta = [&](size_t ExpectedFillDelta, size_t ExpectedKernelDelta, + auto &&SubmitWork) { + const size_t FillBefore = NumMemBufferFillCalls; + const size_t KernelBefore = NumKernelLaunchWithArgsExpCalls; + SubmitWork(); + EXPECT_EQ(NumMemBufferFillCalls, FillBefore + ExpectedFillDelta); + EXPECT_EQ(NumKernelLaunchWithArgsExpCalls, + KernelBefore + ExpectedKernelDelta); + }; + + // 1D full accessor -> urEnqueueMemBufferFill + ExpectDelta(1, 0, [&]() { + Q.submit([&](handler &CGH) { + auto Acc = Buffer1D.get_access(CGH); + CGH.fill(Acc, float{1}); + }).wait(); + }); + + // 1D ranged accessor -> urEnqueueMemBufferFill + ExpectDelta(1, 0, [&]() { + Q.submit([&](handler &CGH) { + auto Acc = + Buffer1D.get_access(CGH, id<1>{4}, range<1>{2}); + CGH.fill(Acc, float{2}); + }).wait(); + }); + + // 2D full accessor -> urEnqueueMemBufferFill + ExpectDelta(1, 0, [&]() { + Q.submit([&](handler &CGH) { + auto Acc = Buffer2D.get_access(CGH); + CGH.fill(Acc, float{3}); + }).wait(); + }); + + // 2D ranged accessor -> urEnqueueKernelLaunchWithArgsExp + ExpectDelta(0, 1, [&]() { + Q.submit([&](handler &CGH) { + auto Acc = Buffer2D.get_access( + CGH, id<2>{8, 12}, range<2>{2, 2}); + CGH.fill(Acc, float{4}); + }).wait(); + }); + + // 3D full accessor -> urEnqueueMemBufferFill + ExpectDelta(1, 0, [&]() { + Q.submit([&](handler &CGH) { + auto Acc = Buffer3D.get_access(CGH); + CGH.fill(Acc, float{5}); + }).wait(); + }); + + // 3D ranged accessor -> urEnqueueKernelLaunchWithArgsExp + ExpectDelta(0, 1, [&]() { + Q.submit([&](handler &CGH) { + auto Acc = Buffer3D.get_access( + CGH, id<3>{4, 8, 12}, range<3>{3, 3, 3}); + CGH.fill(Acc, float{6}); + }).wait(); + }); + + // 0D accessor -> urEnqueueMemBufferFill + ExpectDelta(1, 0, [&]() { + Q.submit([&](handler &CGH) { + accessor Acc0(Buffer0D, CGH); + CGH.fill(Acc0, float{7}); + }).wait(); + }); +} + +TEST_F(BufferUrApiTests, SubbufferOverlapUsesExpectedUrPartitionRegions) { + mock::getCallbacks().set_after_callback("urMemBufferPartition", + &after_urMemBufferPartition); + + buffer BaseBuf{1024}; + id<1> StartOffset{64}; + constexpr size_t Size = 16; + + buffer Sub1{BaseBuf, StartOffset, range<1>{Size}}; + buffer Sub2{BaseBuf, StartOffset, range<1>{Size * 2}}; + + std::array Out1{}; + std::array Out2{}; + + Q.submit([&](handler &CGH) { + auto Acc = Sub1.get_access(CGH); + CGH.copy(Acc, Out1.data()); + }).wait(); + + Q.submit([&](handler &CGH) { + auto Acc = Sub2.get_access(CGH); + CGH.copy(Acc, Out2.data()); + }).wait(); + + ASSERT_EQ(BufferPartitionRegions.size(), size_t{2}); + EXPECT_EQ(BufferPartitionRegions[0].origin, size_t{256}); + EXPECT_EQ(BufferPartitionRegions[0].size, size_t{64}); + EXPECT_EQ(BufferPartitionRegions[1].origin, size_t{256}); + EXPECT_EQ(BufferPartitionRegions[1].size, size_t{128}); +} + +TEST_F(BufferUrApiTests, NativeBufferCreationUsesHostPointerFlag) { + mock::getCallbacks().set_before_callback("urMemBufferCreate", + &before_urMemBufferCreate); + + const int BufVal = 42; + buffer Buf{&BufVal, range<1>{1}}; + + { + // This write access to a const user pointer forces creation of a + // read-write host allocation. + host_accessor BufAcc(Buf, write_only); + (void)BufAcc; + } + + BufferCreateFlags.clear(); + + int Out = 0; + Q.submit([&](handler &CGH) { + auto BufAcc = Buf.get_access(CGH); + CGH.copy(BufAcc, &Out); + }).wait(); + + ASSERT_EQ(BufferCreateFlags.size(), size_t{1}); + EXPECT_EQ(BufferCreateFlags[0] & UR_MEM_FLAG_USE_HOST_POINTER, + UR_MEM_FLAG_USE_HOST_POINTER); +} + +TEST_F(BufferUrApiTests, AllocPinnedHostMemoryUsesAllocHostPointerFlag) { + mock::getCallbacks().set_before_callback("urMemBufferCreate", + &before_urMemBufferCreate); + + int Data[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + buffer A(Data, range<1>(10), {property::buffer::use_host_ptr()}); + buffer B( + range<1>(10), + {ext::oneapi::property::buffer::use_pinned_host_memory()}); + + BufferCreateFlags.clear(); + + Q.submit([&](handler &CGH) { + auto Src = B.get_access(CGH); + auto Dst = A.get_access(CGH); + CGH.copy(Src, Dst); + }).wait(); + + ASSERT_EQ(BufferCreateFlags.size(), size_t{2}); + EXPECT_TRUE(std::any_of(BufferCreateFlags.begin(), BufferCreateFlags.end(), + [](ur_mem_flags_t Flags) { + return (Flags & UR_MEM_FLAG_ALLOC_HOST_POINTER) != + 0; + })); +} + +TEST_F(BufferUrApiTests, UsePinnedHostMemoryUsesAllocHostPointerFlag) { + mock::getCallbacks().set_before_callback("urMemBufferCreate", + &before_urMemBufferCreate); + + range<1> N{1}; + buffer Buf( + N, {ext::oneapi::property::buffer::use_pinned_host_memory()}); + + ASSERT_TRUE( + Buf.has_property()); + + BufferCreateFlags.clear(); + + Q.submit([&](handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.fill(Acc, 7); + }).wait(); + + ASSERT_EQ(BufferCreateFlags.size(), size_t{1}); + EXPECT_EQ(BufferCreateFlags[0] & UR_MEM_FLAG_ALLOC_HOST_POINTER, + UR_MEM_FLAG_ALLOC_HOST_POINTER); +} + +TEST_F(BufferUrApiTests, UsePinnedHostMemoryWithHostPointerThrows) { + int Data = 0; + try { + buffer Buf( + &Data, range<1>{1}, + {ext::oneapi::property::buffer::use_pinned_host_memory()}); + (void)Buf; + FAIL() << "Expected exception was not thrown"; + } catch (sycl::exception &E) { + EXPECT_EQ(E.code(), sycl::errc::invalid); + EXPECT_NE(std::string(E.what()).find( + "The use_pinned_host_memory cannot be used with host " + "pointer"), + std::string::npos); + } +} + +} // namespace diff --git a/sycl/unittests/buffer/CMakeLists.txt b/sycl/unittests/buffer/CMakeLists.txt index 0e9f0300fe116..b0ae0b453cec9 100644 --- a/sycl/unittests/buffer/CMakeLists.txt +++ b/sycl/unittests/buffer/CMakeLists.txt @@ -4,5 +4,6 @@ add_sycl_unittest(BufferTests OBJECT BufferReleaseBase.cpp KernelArgMemObj.cpp SubbufferLargeSize.cpp + BufferUrApi.cpp Properties.cpp )