|
| 1 | +// Copyright (C) 2024 Intel Corporation |
| 2 | +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. |
| 3 | +// See LICENSE.TXT |
| 4 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 5 | + |
| 6 | +#include "fixtures.h" |
| 7 | +#include <chrono> |
| 8 | +#include <thread> |
| 9 | + |
| 10 | +struct QueueBufferTestWithParam : uur::IntegrationQueueTestWithParam { |
| 11 | + void SetUp() override { |
| 12 | + program_name = "cpy_and_mult"; |
| 13 | + UUR_RETURN_ON_FATAL_FAILURE( |
| 14 | + uur::IntegrationQueueTestWithParam::SetUp()); |
| 15 | + } |
| 16 | + |
| 17 | + void TearDown() override { uur::IntegrationQueueTestWithParam::TearDown(); } |
| 18 | + |
| 19 | + void verifyResults(ur_mem_handle_t Buffer, uint32_t ExpectedValue) { |
| 20 | + uint32_t HostMem[ArraySize] = {}; |
| 21 | + ASSERT_SUCCESS(urEnqueueMemBufferRead(Queue, Buffer, true, 0, |
| 22 | + sizeof(uint32_t) * ArraySize, |
| 23 | + HostMem, 0, nullptr, nullptr)); |
| 24 | + |
| 25 | + for (uint32_t i : HostMem) { |
| 26 | + ASSERT_EQ(i, ExpectedValue); |
| 27 | + } |
| 28 | + } |
| 29 | + |
| 30 | + ur_mem_handle_t Buffer1 = nullptr; |
| 31 | + ur_mem_handle_t Buffer2 = nullptr; |
| 32 | +}; |
| 33 | + |
| 34 | +UUR_TEST_SUITE_P(QueueBufferTestWithParam, |
| 35 | + testing::Values(0, /* In-Order */ |
| 36 | + UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE), |
| 37 | + uur::IntegrationQueueTestWithParam::paramPrinter); |
| 38 | + |
| 39 | +/* Submits multiple kernels that interact with each other by accessing and |
| 40 | + * writing to the same buffers. |
| 41 | + * Checks that when using an IN_ORDER queue, no synchronization is needed |
| 42 | + * between calls to urEnqueueKernelLaunch. |
| 43 | + * Checks that when using an OUT_OF_ORDER queue, synchronizing using only |
| 44 | + * event barriers is enough. */ |
| 45 | +TEST_P(QueueBufferTestWithParam, QueueBufferTest) { |
| 46 | + |
| 47 | + std::vector<ur_event_handle_t> EventsFill; |
| 48 | + ur_event_handle_t Event; |
| 49 | + |
| 50 | + size_t Buffer1Index; |
| 51 | + size_t Buffer2Index; |
| 52 | + ASSERT_NO_FATAL_FAILURE( |
| 53 | + AddBuffer1DArg(ArraySize * sizeof(uint32_t), &Buffer1, &Buffer1Index)); |
| 54 | + ASSERT_NO_FATAL_FAILURE( |
| 55 | + AddBuffer1DArg(ArraySize * sizeof(uint32_t), &Buffer2, &Buffer2Index)); |
| 56 | + |
| 57 | + ASSERT_SUCCESS(urEnqueueMemBufferFill( |
| 58 | + Queue, Buffer1, &InitialValue, sizeof(uint32_t), 0, |
| 59 | + ArraySize * sizeof(uint32_t), 0, nullptr, &Event)); |
| 60 | + EventsFill.push_back(Event); |
| 61 | + |
| 62 | + ASSERT_SUCCESS(urEnqueueMemBufferFill( |
| 63 | + Queue, Buffer2, &InitialValue, sizeof(uint32_t), 0, |
| 64 | + ArraySize * sizeof(uint32_t), 0, nullptr, &Event)); |
| 65 | + EventsFill.push_back(Event); |
| 66 | + |
| 67 | + ASSERT_NO_FATAL_FAILURE(submitBarrierIfNeeded(EventsFill)); |
| 68 | + |
| 69 | + constexpr size_t GlobalOffset = 0; |
| 70 | + constexpr size_t NDimensions = 1; |
| 71 | + constexpr uint32_t NumIterations = 5; |
| 72 | + |
| 73 | + uint32_t CurValueMem1 = InitialValue; |
| 74 | + uint32_t CurValueMem2 = InitialValue; |
| 75 | + for (uint32_t i = 0; i < NumIterations; ++i) { |
| 76 | + |
| 77 | + /* Copy from DeviceMem1 to DeviceMem2 and multiply by 2 */ |
| 78 | + ASSERT_SUCCESS( |
| 79 | + urKernelSetArgMemObj(kernel, Buffer2Index, nullptr, Buffer2)); |
| 80 | + ASSERT_SUCCESS( |
| 81 | + urKernelSetArgMemObj(kernel, Buffer1Index, nullptr, Buffer1)); |
| 82 | + |
| 83 | + ASSERT_SUCCESS(urEnqueueKernelLaunch(Queue, kernel, NDimensions, |
| 84 | + &GlobalOffset, &ArraySize, nullptr, |
| 85 | + 0, nullptr, &Event)); |
| 86 | + ASSERT_NO_FATAL_FAILURE(submitBarrierIfNeeded(Event)); |
| 87 | + |
| 88 | + CurValueMem2 = CurValueMem1 * 2; |
| 89 | + |
| 90 | + /* Copy from DeviceMem1 to DeviceMem2 and multiply by 2 */ |
| 91 | + ASSERT_SUCCESS( |
| 92 | + urKernelSetArgMemObj(kernel, Buffer1Index, nullptr, Buffer2)); |
| 93 | + ASSERT_SUCCESS( |
| 94 | + urKernelSetArgMemObj(kernel, Buffer2Index, nullptr, Buffer1)); |
| 95 | + |
| 96 | + ASSERT_SUCCESS(urEnqueueKernelLaunch(Queue, kernel, NDimensions, |
| 97 | + &GlobalOffset, &ArraySize, nullptr, |
| 98 | + 0, nullptr, &Event)); |
| 99 | + ASSERT_NO_FATAL_FAILURE(submitBarrierIfNeeded(Event)); |
| 100 | + |
| 101 | + CurValueMem1 = CurValueMem2 * 2; |
| 102 | + } |
| 103 | + |
| 104 | + ASSERT_SUCCESS(urQueueFinish(Queue)); |
| 105 | + |
| 106 | + ASSERT_NO_FATAL_FAILURE(verifyResults(Buffer1, CurValueMem1)); |
| 107 | + ASSERT_NO_FATAL_FAILURE(verifyResults(Buffer2, CurValueMem2)); |
| 108 | +} |
0 commit comments