From 41844f2202152ff05edb17d6d748da7021e8fee6 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Fri, 30 Sep 2022 02:38:41 -0400 Subject: [PATCH 01/24] Cit -m RAII guards for memory allocations and streams, define some commonly useful utility functions and kernels --- tests/catch/include/resource_guards.hh | 124 +++++++++++++++++++++++++ tests/catch/include/utils.hh | 87 +++++++++++++++++ 2 files changed, 211 insertions(+) create mode 100644 tests/catch/include/resource_guards.hh create mode 100644 tests/catch/include/utils.hh diff --git a/tests/catch/include/resource_guards.hh b/tests/catch/include/resource_guards.hh new file mode 100644 index 0000000000..293fd9d493 --- /dev/null +++ b/tests/catch/include/resource_guards.hh @@ -0,0 +1,124 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include + +enum class LinearAllocs { + malloc, + mallocAndRegister, + hipHostMalloc, + hipMalloc, + hipMallocManaged, +}; + +template class LinearAllocGuard { + public: + LinearAllocGuard(const LinearAllocs allocation_type, const size_t size, + const unsigned int flags = 0u) + : allocation_type_{allocation_type} { + switch (allocation_type_) { + case LinearAllocs::malloc: + ptr_ = host_ptr_ = reinterpret_cast(malloc(size)); + break; + case LinearAllocs::mallocAndRegister: + host_ptr_ = reinterpret_cast(malloc(size)); + HIP_CHECK(hipHostRegister(host_ptr_, size, flags)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&ptr_), host_ptr_, 0u)); + break; + case LinearAllocs::hipHostMalloc: + HIP_CHECK(hipHostMalloc(reinterpret_cast(&ptr_), size, flags)); + host_ptr_ = ptr_; + break; + case LinearAllocs::hipMalloc: + HIP_CHECK(hipMalloc(reinterpret_cast(&ptr_), size)); + break; + case LinearAllocs::hipMallocManaged: + HIP_CHECK(hipMallocManaged(reinterpret_cast(&ptr_), size, flags ? flags : 1u)); + host_ptr_ = ptr_; + } + } + + LinearAllocGuard(const LinearAllocGuard&) = delete; + LinearAllocGuard(LinearAllocGuard&&) = delete; + + ~LinearAllocGuard() { + // No Catch macros, don't want to possibly throw in the destructor + switch (allocation_type_) { + case LinearAllocs::malloc: + free(ptr_); + break; + case LinearAllocs::mallocAndRegister: + hipHostUnregister(host_ptr_); + free(host_ptr_); + break; + case LinearAllocs::hipHostMalloc: + hipHostFree(ptr_); + break; + case LinearAllocs::hipMalloc: + case LinearAllocs::hipMallocManaged: + hipFree(ptr_); + } + } + + T* ptr() { return ptr_; }; + T* const ptr() const { return ptr_; }; + T* host_ptr() { return host_ptr_; } + T* const host_ptr() const { return host_ptr(); } + + private: + const LinearAllocs allocation_type_; + T* ptr_ = nullptr; + T* host_ptr_ = nullptr; +}; + +enum class Streams { nullstream, perThread, created }; + +class StreamGuard { + public: + StreamGuard(const Streams stream_type) : stream_type_{stream_type} { + switch (stream_type_) { + case Streams::nullstream: + stream_ = nullptr; + break; + case Streams::perThread: + stream_ = hipStreamPerThread; + break; + case Streams::created: + HIP_CHECK(hipStreamCreate(&stream_)); + } + } + + StreamGuard(const StreamGuard&) = delete; + StreamGuard(StreamGuard&&) = delete; + + ~StreamGuard() { + if (stream_type_ == Streams::created) { + hipStreamDestroy(stream_); + } + } + + hipStream_t stream() const { return stream_; } + + private: + const Streams stream_type_; + hipStream_t stream_; +}; \ No newline at end of file diff --git a/tests/catch/include/utils.hh b/tests/catch/include/utils.hh new file mode 100644 index 0000000000..614159eda7 --- /dev/null +++ b/tests/catch/include/utils.hh @@ -0,0 +1,87 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +#include +#include + +namespace { +inline constexpr size_t kPageSize = 4096; +} // anonymous namespace + +template +void MemcpyArrayCompare(T* const expected, T* const actual, const size_t num_elements) { + const auto ret = std::mismatch(expected, expected + num_elements, actual); + if (ret.first != expected + num_elements) { + const auto idx = std::distance(expected, ret.first); + INFO("Value mismatch at index: " << idx); + REQUIRE(expected[idx] == actual[idx]); + } +} + +template +void ArrayFindIfNot(T* const array, const T expected_value, const size_t num_elements) { + const auto it = std::find_if_not(array, array + num_elements, [expected_value](const int elem) { + return expected_value == elem; + }); + + if (it != array + num_elements) { + const auto idx = std::distance(array, it); + INFO("Value mismatch at index " << idx); + REQUIRE(expected_value == array[idx]); + } +} + +template +__global__ void VectorIncrement(T* const vec, const T increment_value, size_t N) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < N; i += stride) { + vec[i] += increment_value; + } +} + +template __global__ void VectorSet(T* const vec, const T value, size_t N) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < N; i += stride) { + vec[i] = value; + } +} + +// Will execute for atleast interval milliseconds +static __global__ void Delay(uint32_t interval, const uint32_t ticks_per_ms) { + while (interval--) { + uint64_t start = clock(); + while (clock() - start < ticks_per_ms) { + } + } +} + +inline void LaunchDelayKernel(const std::chrono::milliseconds interval, const hipStream_t stream) { + int ticks_per_ms = 0; + // Clock rate is in kHz => number of clock ticks in a millisecond + HIP_CHECK(hipDeviceGetAttribute(&ticks_per_ms, hipDeviceAttributeClockRate, 0)); + Delay<<<1, 1, 0, stream>>>(interval.count(), ticks_per_ms); +} \ No newline at end of file From 858da0e1ae643b9c8f0347e98ad4ffacd6b5ccbe Mon Sep 17 00:00:00 2001 From: Dino Music Date: Mon, 3 Oct 2022 05:22:42 -0400 Subject: [PATCH 02/24] Implement helper function for generating allocation flags --- tests/catch/include/resource_guards.hh | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/catch/include/resource_guards.hh b/tests/catch/include/resource_guards.hh index 293fd9d493..9f50ea443a 100644 --- a/tests/catch/include/resource_guards.hh +++ b/tests/catch/include/resource_guards.hh @@ -121,4 +121,23 @@ class StreamGuard { private: const Streams stream_type_; hipStream_t stream_; -}; \ No newline at end of file +}; + +inline unsigned int GenerateLinearAllocationFlagCombinations(const LinearAllocs allocation_type) { + switch (allocation_type) { + case LinearAllocs::mallocAndRegister: + // TODO + return 0; + case LinearAllocs::hipHostMalloc: + return GENERATE(hipHostMallocDefault, hipHostMallocPortable, hipHostMallocMapped, + hipHostMallocWriteCombined); + case LinearAllocs::hipMallocManaged: + // TODO + return 1u; + case LinearAllocs::malloc: + case LinearAllocs::hipMalloc: + return 0u; + default: + assert("Invalid LinearAllocs enumerator"); + } +} \ No newline at end of file From 899b91f978a97f2cd4a082d71d8517bd74461ad9 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Tue, 4 Oct 2022 08:27:42 +0200 Subject: [PATCH 03/24] Implement helper function DeviceAttributesSupport to check if a device supports any number of attributes --- tests/catch/include/utils.hh | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/catch/include/utils.hh b/tests/catch/include/utils.hh index 614159eda7..1448c4f768 100644 --- a/tests/catch/include/utils.hh +++ b/tests/catch/include/utils.hh @@ -29,7 +29,7 @@ inline constexpr size_t kPageSize = 4096; } // anonymous namespace template -void MemcpyArrayCompare(T* const expected, T* const actual, const size_t num_elements) { +void ArrayMismatch(T* const expected, T* const actual, const size_t num_elements) { const auto ret = std::mismatch(expected, expected + num_elements, actual); if (ret.first != expected + num_elements) { const auto idx = std::distance(expected, ret.first); @@ -84,4 +84,15 @@ inline void LaunchDelayKernel(const std::chrono::milliseconds interval, const hi // Clock rate is in kHz => number of clock ticks in a millisecond HIP_CHECK(hipDeviceGetAttribute(&ticks_per_ms, hipDeviceAttributeClockRate, 0)); Delay<<<1, 1, 0, stream>>>(interval.count(), ticks_per_ms); +} + +template +inline bool DeviceAttributesSupport(const int device, Attributes... attributes) { + constexpr auto DeviceAttributeSupport = [](const int device, + const hipDeviceAttribute_t attribute) { + int value = 0; + HIP_CHECK(hipDeviceGetAttribute(&value, attribute, device)); + return value; + }; + return (... && DeviceAttributeSupport(device, attributes)); } \ No newline at end of file From 736eff80d406e5335648a40f45c437ff96496c67 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Tue, 4 Oct 2022 09:07:56 +0200 Subject: [PATCH 04/24] EXSWHTEC-83 - Implement new and reimplement existing tests for hipMemAdvise --- tests/catch/unit/memory/CMakeLists.txt | 5 +- tests/catch/unit/memory/hipMemAdvise.cc | 1128 ++++--------------- tests/catch/unit/memory/hipMemAdvise_old.cc | 962 ++++++++++++++++ 3 files changed, 1190 insertions(+), 905 deletions(-) create mode 100644 tests/catch/unit/memory/hipMemAdvise_old.cc diff --git a/tests/catch/unit/memory/CMakeLists.txt b/tests/catch/unit/memory/CMakeLists.txt index f24c63ad8c..ed4bc92eb7 100644 --- a/tests/catch/unit/memory/CMakeLists.txt +++ b/tests/catch/unit/memory/CMakeLists.txt @@ -97,6 +97,7 @@ set(TEST_SRC hipMemsetSync.cc hipMemsetAsync.cc hipMemAdvise.cc + hipMemAdvise_old.cc ) else() set(TEST_SRC @@ -170,6 +171,7 @@ set(TEST_SRC hipMemsetSync.cc hipMemsetAsync.cc hipMemAdvise.cc + hipMemAdvise_old.cc ) endif() @@ -183,5 +185,4 @@ endif() hip_add_exe_to_target(NAME MemoryTest TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests - COMPILE_OPTIONS -std=c++14) + TEST_TARGET_NAME build_tests) diff --git a/tests/catch/unit/memory/hipMemAdvise.cc b/tests/catch/unit/memory/hipMemAdvise.cc index 96a0318bb8..30bd744e45 100644 --- a/tests/catch/unit/memory/hipMemAdvise.cc +++ b/tests/catch/unit/memory/hipMemAdvise.cc @@ -1,962 +1,284 @@ /* -Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* Test Case Description: - Scenario-1: The following Function Tests the working of flags which can be - assigned to HMM memory using hipMemAdvise() api - Scenario-2: Negative tests on hipMemAdvise() api - Scenario-3: The following function tests various scenarios around the flag - 'hipMemAdviseSetPreferredLocation' using HMM memory and hipMemAdvise() api - Scenario-4: The following function tests various scenarios around the flag - 'hipMemAdviseSetReadMostly' using HMM memory and hipMemAdvise() api - Scenario-5: The following function verifies if assigning of a flag - invalidates the earlier flag which was assigned to the same memory region - using hipMemAdvise() - Scenario-6: The following function tests if peers can set - hipMemAdviseSetAccessedBy flag - on HMM memory prefetched on each of the other gpus - Scenario-7: Set AccessedBy flag and check value returned by - hipMemRangeGetAttribute() It should be -2(same is observed on cuda) - Scenario-8: Set AccessedBy flag to device 0 on Hmm memory and prefetch the - memory to device 1, then probe for AccessedBy flag using - hipMemRangeGetAttribute() we should still see the said flag is set for - device 0 - Scenario-9: 1) Set AccessedBy to device 0 followed by PreferredLocation to - device 1 check for AccessedBy flag using hipMemRangeGetAttribute() it should - return 0 - 2) Unset AccessedBy to 0 and set it to device 1 followed by - PreferredLocation to device 1, check for AccessedBy flag using - hipMemRangeGetAttribute() it should return 1 - Scenario-10: Set AccessedBy flag to HMM memory launch a kernel and then unset - AccessedBy, launch kernel. We should not have any access issues - Scenario-11: Allocate memory using aligned_alloc(), assign PreferredLocation - flag to the allocated memory and launch a kernel. Kernel should get executed - successfully without hang or segfault - Scenario-12: Allocate Hmm memory, set advise to PreferredLocation and then - get attribute using the api hipMemRangeGetAttribute() for - hipMemRangeAttributeLastPrefetchLocation the value returned should be -2 - Scenario-13: Allocate HMM memory, set PreferredLocation to device 0, Prfetch - the mem to device1, probe for hipMemRangeAttributeLastPrefetchLocation using - hipMemRangeGetAttribute(), we should get 1 - Scenario-14: Allocate HMM memory, set ReadMostly followed by - PreferredLocation, probe for hipMemRangeAttributeReadMostly and - hipMemRangeAttributePreferredLocation - using hipMemRangeGetAttribute() we should observe 1 and 0 correspondingly. - In other words setting of hipMemRangeAttributePreferredLocation should not - impact hipMemRangeAttributeReadMostly advise to the memory - Scenario-15: Allocate Hmm memory, advise it to ReadMostly for gpu: 0 and - launch kernel on all other gpus except 0. This test case may discover any - effect or access denial case arising due to setting ReadMostly only to a - particular gpu +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. */ #include -#if __linux__ -#include -#include -#include -#endif - -// Kernel function -__global__ void MemAdvseKernel(int n, int *x) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - if (index < n) - x[index] = x[index] * x[index]; -} - -// Kernel -__global__ void MemAdvise2(int *Hmm, int n) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - int stride = blockDim.x * gridDim.x; - for (int i = index; i < n; i += stride) { - Hmm[i] = Hmm[i] + 10; +#include +#include +#include + +static inline hipMemoryAdvise GetUnsetMemAdvice(const hipMemoryAdvise advice) { + switch (advice) { + case hipMemAdviseSetAccessedBy: + return hipMemAdviseUnsetAccessedBy; + case hipMemAdviseSetReadMostly: + return hipMemAdviseUnsetReadMostly; + case hipMemAdviseSetPreferredLocation: + return hipMemAdviseUnsetPreferredLocation; + default: + assert("Invalid hipMemoryAdvise enumerator"); } } -// Kernel -__global__ void MemAdvise3(int *Hmm, int *Hmm1, int n) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - int stride = blockDim.x * gridDim.x; - for (int i = index; i < n; i += stride) { - Hmm1[i] = Hmm[i] + 10; - } -} - - -static bool CheckError(hipError_t err, int LineNo) { - if (err == hipSuccess) { - WARN("Error expected but received hipSuccess at line no.:" << LineNo); - return false; - } else { - return true; +static inline hipMemRangeAttribute GetMemAdviceAttr(const hipMemoryAdvise advice) { + switch (advice) { + case hipMemAdviseSetAccessedBy: + return hipMemRangeAttributeAccessedBy; + case hipMemAdviseSetReadMostly: + return hipMemRangeAttributeReadMostly; + case hipMemAdviseSetPreferredLocation: + return hipMemRangeAttributePreferredLocation; + default: + assert("Invalid hipMemoryAdvise enumerator"); } } -static int HmmAttrPrint() { - int managed = 0; - WARN("The following are the attribute values related to HMM for" - " device 0:\n"); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributeDirectManagedMemAccessFromHost, 0)); - WARN("hipDeviceAttributeDirectManagedMemAccessFromHost: " << managed); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributeConcurrentManagedAccess, 0)); - WARN("hipDeviceAttributeConcurrentManagedAccess: " << managed); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributePageableMemoryAccess, 0)); - WARN("hipDeviceAttributePageableMemoryAccess: " << managed); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributePageableMemoryAccessUsesHostPageTables, 0)); - WARN("hipDeviceAttributePageableMemoryAccessUsesHostPageTables:" << managed); - - HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, - 0)); - WARN("hipDeviceAttributeManagedMemory: " << managed); - return managed; -} - - -// The following Function Tests the working of flags which can be assigned -// to HMM memory using hipMemAdvise() api -TEST_CASE("Unit_hipMemAdvise_TstFlags") { - int MangdMem = HmmAttrPrint(); - if (MangdMem == 1) { - bool IfTestPassed = true; - int NumDevs = 0, *Outpt = nullptr; - int MEM_SIZE = 4*1024, A_CONST = 9999; - float *Hmm = nullptr; - int AttrVal = 0; - HIP_CHECK(hipGetDeviceCount(&NumDevs)); - Outpt = new int(NumDevs); - HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE * 2, hipMemAttachGlobal)); - // With the following for loop we iterate through each of the Gpus in the - // system set and unset the flags and check the behavior. - for (int i = 0; i < NumDevs; ++i) { - HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, hipMemAdviseSetReadMostly, i)); - HIP_CHECK(hipMemRangeGetAttribute(&AttrVal, sizeof(AttrVal), - hipMemRangeAttributeReadMostly, Hmm, - MEM_SIZE * 2)); - if (AttrVal != 1) { - WARN("Attempt to set hipMemAdviseSetReadMostly flag failed!\n"); - IfTestPassed = false; - } - HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, hipMemAdviseUnsetReadMostly, - i)); - - HIP_CHECK(hipMemRangeGetAttribute(&AttrVal, sizeof(AttrVal), - hipMemRangeAttributeReadMostly, Hmm, - (MEM_SIZE * 2))); - if (AttrVal != 0) { - WARN("Attempt to Unset hipMemAdviseSetReadMostly flag failed!\n"); - IfTestPassed = false; - } - AttrVal = A_CONST; - // Currently hipMemAdviseSetPreferredLocation and - // hipMemAdviseSetAccessedBy - // flags are resulting in issues: SWDEV-267357 - HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, - hipMemAdviseSetPreferredLocation, i)); - HIP_CHECK(hipMemRangeGetAttribute(&AttrVal, sizeof(AttrVal), - hipMemRangeAttributePreferredLocation, - Hmm, (MEM_SIZE * 2))); - if (AttrVal != i) { - WARN("Attempt to set hipMemAdviseSetPreferredLocation flag failed!\n"); - IfTestPassed = false; - } - AttrVal = A_CONST; - HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, - hipMemAdviseUnsetPreferredLocation, i)); - HIP_CHECK(hipMemRangeGetAttribute(&AttrVal, sizeof(AttrVal), - hipMemRangeAttributePreferredLocation, - Hmm, (MEM_SIZE * 2))); - if (AttrVal == i) { - WARN("Attempt to Unset hipMemAdviseUnsetPreferredLocation "); - WARN("flag failed!\n"); - IfTestPassed = false; - } - for (int m = 0; m < NumDevs; ++m) { - Outpt[m] = A_CONST; - } - HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, hipMemAdviseSetAccessedBy, i)); - HIP_CHECK(hipMemRangeGetAttribute(Outpt, sizeof(Outpt), - hipMemRangeAttributeAccessedBy, Hmm, - (MEM_SIZE * 2))); - if ((Outpt[0]) != i) { - WARN("Attempt to set hipMemAdviseSetAccessedBy flag failed!\n"); - IfTestPassed = false; - } - for (int m = 0; m < NumDevs; ++m) { - Outpt[m] = A_CONST; - } - HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, hipMemAdviseUnsetAccessedBy, - i)); - HIP_CHECK(hipMemRangeGetAttribute(Outpt, sizeof(Outpt), - hipMemRangeAttributeAccessedBy, Hmm, - (MEM_SIZE * 2))); - if ((Outpt[0]) >= 0) { - WARN("Attempt to Unset hipMemAdviseUnsetAccessedBy flag failed!\n"); - IfTestPassed = false; - } +std::vector GetDevicesWithAdviseSupport() { + const auto device_count = HipTest::getDeviceCount(); + std::vector supported_devices; + supported_devices.reserve(device_count + 1); + for (int i = 0; i < device_count; ++i) { + if (DeviceAttributesSupport(i, hipDeviceAttributeManagedMemory, + hipDeviceAttributeConcurrentManagedAccess)) { + supported_devices.push_back(i); } - delete [] Outpt; - HIP_CHECK(hipFree(Hmm)); - REQUIRE(IfTestPassed); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); } + return supported_devices; } -TEST_CASE("Unit_hipMemAdvise_NegtveTsts") { - int MangdMem = HmmAttrPrint(); - if (MangdMem == 1) { - bool IfTestPassed = true; - int NumDevs = 0, MEM_SIZE = 4*1024; - float *Hmm = nullptr; - std::string str; - HIP_CHECK(hipGetDeviceCount(&NumDevs)); - HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE * 2, hipMemAttachGlobal)); -#if HT_AMD - // Passing invalid value(99) device param - IfTestPassed &= CheckError(hipMemAdvise(Hmm, MEM_SIZE * 2, - hipMemAdviseSetReadMostly, 99), __LINE__); - - // Passing invalid value(-12) device param - IfTestPassed &= CheckError(hipMemAdvise(Hmm, MEM_SIZE * 2, - hipMemAdviseSetReadMostly, -12), __LINE__); -#endif - // Passing NULL as first parameter instead of valid pointer to a memory - IfTestPassed &= CheckError(hipMemAdvise(NULL, MEM_SIZE * 2, - hipMemAdviseSetReadMostly, 0), __LINE__); - - // Passing 0 for count(2nd param) parameter - IfTestPassed &= CheckError(hipMemAdvise(Hmm, 0, hipMemAdviseSetReadMostly, - 0), __LINE__); - - // Passing count much more than actually allocated value - IfTestPassed &= CheckError(hipMemAdvise(Hmm, MEM_SIZE * 6, - hipMemAdviseSetReadMostly, 0), __LINE__); - - REQUIRE(IfTestPassed); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); +TEST_CASE("Unit_hipMemAdvise_Set_Unset_Basic") { + auto supported_devices = GetDevicesWithAdviseSupport(); + if (supported_devices.empty()) { + HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); + return; } + supported_devices.push_back(hipCpuDeviceId); + const auto device = GENERATE_COPY(from_range(supported_devices)); + + const auto SetUnset = [=](const hipMemoryAdvise advice) { + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); + int32_t attribute = 0u; + HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, advice, device)); + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), GetMemAdviceAttr(advice), + alloc.ptr(), kPageSize)); + REQUIRE((advice == hipMemAdviseSetReadMostly ? 1 : device) == attribute); + HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, GetUnsetMemAdvice(advice), device)); + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), GetMemAdviceAttr(advice), + alloc.ptr(), kPageSize)); + REQUIRE((advice == hipMemAdviseSetReadMostly ? 0 : hipInvalidDeviceId) == attribute); + }; + + SECTION("hipMemAdviseSetAccessedBy") { SetUnset(hipMemAdviseSetAccessedBy); } + SECTION("hipMemAdviseSetReadMostly") { SetUnset(hipMemAdviseSetReadMostly); } + SECTION("hipMemAdviseSetPreferredLocation") { SetUnset(hipMemAdviseSetPreferredLocation); } } -// The following function tests various scenarios around the flag -// 'hipMemAdviseSetPreferredLocation' using HMM memory and hipMemAdvise() api -TEST_CASE("Unit_hipMemAdvise_PrefrdLoc") { - int MangdMem = HmmAttrPrint(); - if (MangdMem == 1) { - // Check that when a page fault occurs for the memory region set to devPtr, - // the data is migrated to the destn processor - int MEM_SIZE = 4096, A_CONST = 9999; - int *Hmm = nullptr, NumDevs = 0, dev = A_CONST; - bool IfTestPassed = true; - HIP_CHECK(hipGetDeviceCount(&NumDevs)); - HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE * 3, hipMemAttachGlobal)); - for (int i = 0; i < ((MEM_SIZE * 3)/4); ++i) { - Hmm[i] = 4; - } - for (int devId = 0; devId < NumDevs; ++devId) { - HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE * 3, - hipMemAdviseSetPreferredLocation, devId)); - int NumElms = ((MEM_SIZE * 3)/4); - MemAdvseKernel<<>>(NumElms, Hmm); - int dev = A_CONST; - HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(dev), - hipMemRangeAttributePreferredLocation, - Hmm, MEM_SIZE * 3)); - if (dev != devId) { - WARN("Memory observed to be not available on expected location\n"); - WARN("line no: " << __LINE__); - WARN("dev: " << dev); - IfTestPassed = false; - } - } - - // Check that when preferred location is set for a memory region, - // data can still be prefetched using hipMemPrefetchAsync - hipStream_t strm; - dev = A_CONST; - for (int devId = 0; devId < NumDevs; ++devId) { - HIP_CHECK(hipSetDevice(devId)); - HIP_CHECK(hipStreamCreate(&strm)); - HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE * 3, - hipMemAdviseSetPreferredLocation, devId)); - HIP_CHECK(hipMemPrefetchAsync(Hmm, MEM_SIZE * 3, devId, strm)); - HIP_CHECK(hipStreamSynchronize(strm)); - HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(dev), - hipMemRangeAttributeLastPrefetchLocation, - Hmm, MEM_SIZE * 3)); - if (dev != devId) { - WARN("Memory reported to be not available at the Prefetched "); - WARN("location with device id: " << devId); - WARN("line no: " << __LINE__); - WARN("dev: " << dev); - IfTestPassed = false; - } - HIP_CHECK(hipStreamDestroy(strm)); - } - HIP_CHECK(hipFree(Hmm)); - REQUIRE(IfTestPassed); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); +TEST_CASE("Unit_hipMemAdvise_No_Flag_Interference") { + auto supported_devices = GetDevicesWithAdviseSupport(); + if (supported_devices.empty()) { + HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); + return; } -} - -// The following function tests various scenarios around the flag -// 'hipMemAdviseSetReadMostly' using HMM memory and hipMemAdvise() api - -TEST_CASE("Unit_hipMemAdvise_ReadMostly") { - int MangdMem = HmmAttrPrint(); - if (MangdMem == 1) { - bool IfTestPassed = true; - int MEM_SIZE = 4096, A_CONST = 9999; - float *Hmm = nullptr; - HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE)); - for (uint64_t i = 0; i < (MEM_SIZE/sizeof(float)); ++i) { - Hmm[i] = A_CONST; - } - HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetReadMostly, 0)); - // Checking if the data can be read after setting hipMemAdviseSetReadMostly - for (uint64_t i = 0; i < (MEM_SIZE/sizeof(float)); ++i) { - if (Hmm[i] != A_CONST) { - WARN("Didn't find expected value in Hmm memory after setting"); - WARN(" hipMemAdviseSetReadMostly flag line no.: " << __LINE__); - IfTestPassed = false; - } - } + supported_devices.push_back(hipCpuDeviceId); + const auto device = GENERATE_COPY(from_range(supported_devices)); - // Checking if the memory region can be modified - for (uint64_t i = 0; i < (MEM_SIZE/sizeof(float)); ++i) { - Hmm[i] = A_CONST; - } + std::array advice{hipMemAdviseSetReadMostly, hipMemAdviseSetPreferredLocation, + hipMemAdviseSetAccessedBy}; + for (int i = 0; i < 6; ++i) { + std::next_permutation(std::begin(advice), std::end(advice)); + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); - for (uint64_t i = 0; i < (MEM_SIZE/sizeof(float)); ++i) { - if (Hmm[i] != A_CONST) { - WARN("Didn't find expected value in Hmm memory after Modification\n"); - WARN("line no.: " << __LINE__); - IfTestPassed = false; - } + for (const auto a : advice) { + HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, a, device)); } - int out = A_CONST; - HIP_CHECK(hipMemRangeGetAttribute(&out, 4, hipMemRangeAttributeReadMostly, - Hmm, MEM_SIZE)); - if (out != 1) { - WARN("out value: " << out); - IfTestPassed = false; - } - // Checking the advise attribute after prefetch - HIP_CHECK(hipMemPrefetchAsync(Hmm, MEM_SIZE, 0, 0)); - HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipMemRangeGetAttribute(&out, sizeof(int), - hipMemRangeAttributeReadMostly, Hmm, - MEM_SIZE)); - if (out != 1) { - WARN("Attribute assigned to memory changed after calling "); - WARN("hipMemPrefetchAsync(). line no.: " << __LINE__); - WARN("out value: " << out); - IfTestPassed = false; + for (const auto a : advice) { + auto attribute = 0u; + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), GetMemAdviceAttr(a), + alloc.ptr(), kPageSize)); + REQUIRE((a == hipMemAdviseSetReadMostly ? 1 : device) == attribute); } - HIP_CHECK(hipFree(Hmm)); - REQUIRE(IfTestPassed); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); } } -// The following function verifies if assigning of a flag invalidates the -// earlier flag which was assigned to the same memory region using -// hipMemAdvise() -TEST_CASE("Unit_hipMemAdvise_TstFlgOverrideEffect") { - int MangdMem = HmmAttrPrint(); - if (MangdMem == 1) { - bool IfTestPassed = true; - int MEM_SIZE = 4*4096, A_CONST = 9999; - float *Hmm = nullptr; - int NumDevs = 0, dev = A_CONST; - - HIP_CHECK(hipGetDeviceCount(&NumDevs)); - HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE, hipMemAttachGlobal)); - for (int i = 0; i < NumDevs; ++i) { - HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetReadMostly, i)); - HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(int), - hipMemRangeAttributeReadMostly, Hmm, - MEM_SIZE)); - if (dev != 1) { - WARN("hipMemAdviseSetReadMostly flag did not take affect despite "); - WARN("setting it using hipMemAdvise(). line no.: " << __LINE__); - IfTestPassed = false; - break; - } - dev = A_CONST; - HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetPreferredLocation, - i)); - HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(int), - hipMemRangeAttributePreferredLocation, - Hmm, MEM_SIZE)); - if (dev != i) { - WARN("hipMemAdviseSetPreferredLocation flag did not take affect "); - WARN("despite setting it using hipMemAdvise()\n"); - WARN("line no.: " << __LINE__); - IfTestPassed = false; - break; - } - - HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetAccessedBy, i)); - dev = A_CONST; - HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(int), - hipMemRangeAttributeAccessedBy, Hmm, - MEM_SIZE)); - if (dev != i) { - WARN("hipMemAdviseSetAccessedBy flag did not take affect despite "); - WARN("setting it using hipMemAdvise(). line no.: " << __LINE__); - IfTestPassed = false; - break; - } - HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseUnsetAccessedBy, i)); - } - HIP_CHECK(hipFree(Hmm)); - REQUIRE(IfTestPassed); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); +TEST_CASE("Unit_hipMemAdvise_Rounding") { + auto supported_devices = GetDevicesWithAdviseSupport(); + if (supported_devices.empty()) { + HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); + return; } + supported_devices.push_back(hipCpuDeviceId); + const auto device = supported_devices.front(); + + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, 3 * kPageSize); + REQUIRE_FALSE(reinterpret_cast(alloc.ptr()) % kPageSize); + const auto [offset, width] = + GENERATE_COPY(std::make_pair(kPageSize / 4, kPageSize / 2), // Withing page + std::make_pair(kPageSize / 2, kPageSize), // Across page border + std::make_pair(kPageSize / 2, kPageSize * 2)); // Across two page borders + HIP_CHECK(hipMemAdvise(alloc.ptr() + offset, width, hipMemAdviseSetAccessedBy, device)); + constexpr auto RoundDown = [](const intptr_t a, const intptr_t n) { return a - a % n; }; + constexpr auto RoundUp = [RoundDown](const intptr_t a, const intptr_t n) { + return RoundDown(a + n - 1, n); + }; + const auto base = alloc.ptr(); + const auto rounded_up = RoundUp(offset + width, kPageSize); + unsigned int attribute = 0; + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), hipMemRangeAttributeAccessedBy, + reinterpret_cast(base), rounded_up)); + REQUIRE(device == attribute); + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), hipMemRangeAttributeAccessedBy, + alloc.ptr(), 3 * kPageSize)); + REQUIRE((rounded_up == 3 * kPageSize ? device : hipInvalidDeviceId) == attribute); } - -// The following function tests if peers can set hipMemAdviseSetAccessedBy flag -// on HMM memory prefetched on each of the other gpus -#if HT_AMD -TEST_CASE("Unit_hipMemAdvise_TstAccessedByPeer") { - int MangdMem = HmmAttrPrint(); - if (MangdMem == 1) { - bool IfTestPassed = true; - int *Hmm = nullptr, MEM_SIZE = 4*4096, A_CONST = 9999;; - int NumDevs = 0, CanAccessPeer = A_CONST, flag = 0; - - HIP_CHECK(hipGetDeviceCount(&NumDevs)); - if (NumDevs < 2) { - SUCCEED("Test TestSetAccessedByPeer() need atleast two Gpus to test" - " the scenario. This system has GPUs less than 2"); - } - HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE, hipMemAttachGlobal)); - for (int i = 0; i < NumDevs; ++i) { - HIP_CHECK(hipMemPrefetchAsync(Hmm, MEM_SIZE, i, 0)); - for (int j = 0; j < NumDevs; ++j) { - if (i == j) - continue; - HIP_CHECK(hipSetDevice(j)); - HIP_CHECK(hipDeviceCanAccessPeer(&CanAccessPeer, j, i)); - if (CanAccessPeer) { - HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetAccessedBy, j)); - for (uint64_t m = 0; m < (MEM_SIZE/sizeof(int)); ++m) { - Hmm[m] = 4; - } - HIP_CHECK(hipDeviceEnablePeerAccess(i, 0)); - MemAdvseKernel<<<(MEM_SIZE/sizeof(int)/32), 32>>>( - (MEM_SIZE/sizeof(int)), Hmm); - HIP_CHECK(hipDeviceSynchronize()); - // Verifying the result - for (uint64_t m = 0; m < (MEM_SIZE/sizeof(int)); ++m) { - if (Hmm[m] != 16) { - flag = 1; - } - } - if (flag) { - WARN("Didnt get Expected results with device: " << j); - WARN("line no.: " << __LINE__); - IfTestPassed = false; - flag = 0; - } - } - } - } - HIP_CHECK(hipFree(Hmm)); - REQUIRE(IfTestPassed); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); - } -} -#endif - - -/* Set AccessedBy flag and check value returned by hipMemRangeGetAttribute() - It should be -2(same is observed on cuda)*/ -TEST_CASE("Unit_hipMemAdvise_TstAccessedByFlg") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Hmm = NULL, data = 999; - HIP_CHECK(hipMallocManaged(&Hmm, 2*4096)); - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetAccessedBy, 0)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributeLastPrefetchLocation, - Hmm, 2*4096)); - if (data != -2) { - WARN("Didnt get expected value!!\n"); - REQUIRE(false); - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); +TEST_CASE("Unit_hipMemAdvise_Flags_Do_Not_Cause_Prefetch") { + auto supported_devices = GetDevicesWithAdviseSupport(); + if (supported_devices.empty()) { + HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); } + supported_devices.push_back(hipCpuDeviceId); + + const auto Test = [](const int device, const hipMemoryAdvise advice) { + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); + HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, hipMemAdviseSetPreferredLocation, device)); + int32_t attribute = 0u; + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), + hipMemRangeAttributeLastPrefetchLocation, alloc.ptr(), + kPageSize)); + REQUIRE(attribute == hipInvalidDeviceId); + }; + const auto device = + GENERATE_COPY(from_range(std::begin(supported_devices), std::end(supported_devices))); + + SECTION("hipMemAdviseSetPreferredLocation") { Test(device, hipMemAdviseSetPreferredLocation); } + SECTION("hipMemAdviseSetAccessedBy") { Test(device, hipMemAdviseSetAccessedBy); } } -/* Set AccessedBy flag to device 0 on Hmm memory and prefetch the memory to - device 1, then probe for AccessedBy flag using hipMemRangeGetAttribute() - we should still see the said flag is set for device 0*/ -TEST_CASE("Unit_hipMemAdvise_TstAccessedByFlg2") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Hmm = NULL, data = 999, Ngpus = 0; - HIP_CHECK(hipGetDeviceCount(&Ngpus)); - if (Ngpus >= 2) { - hipStream_t strm; - HIP_CHECK(hipStreamCreate(&strm)); - HIP_CHECK(hipMallocManaged(&Hmm, 2*4096)); - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetAccessedBy, 0)); - HIP_CHECK(hipMemPrefetchAsync(Hmm, 2*4096, 1, strm)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributeAccessedBy, Hmm, 2*4096)); - if (data != 0) { - WARN("Didnt get expected behavior at line: " << __LINE__); - REQUIRE(false); - } - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseUnsetAccessedBy, 0)); - HIP_CHECK(hipStreamDestroy(strm)); - HIP_CHECK(hipFree(Hmm)); - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); +TEST_CASE("Unit_hipMemAdvise_Read_Write_After_Advise") { + auto supported_devices = GetDevicesWithAdviseSupport(); + if (supported_devices.empty()) { + HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); } + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); + constexpr size_t count = kPageSize / sizeof(*alloc.ptr()); + + const auto ReadWriteManagedMemory = [&](const int device, const hipMemoryAdvise advice) { + HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, advice, device)); + + std::fill_n(alloc.ptr(), count, -1); + ArrayFindIfNot(alloc.ptr(), -1, count); + for (int i = 0; i < supported_devices.size(); ++i) { + HIP_CHECK(hipSetDevice(supported_devices[i])); + VectorIncrement<<>>(alloc.ptr(), 1, count); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + ArrayFindIfNot(alloc.ptr(), i, count); + } + + int32_t attribute = 0u; + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), GetMemAdviceAttr(advice), + alloc.ptr(), kPageSize)); + REQUIRE((advice == hipMemAdviseSetReadMostly ? 1 : device) == attribute); + }; + + SECTION("ReadMostly") { ReadWriteManagedMemory(hipInvalidDeviceId, hipMemAdviseSetReadMostly); } + supported_devices.push_back(hipCpuDeviceId); + const auto device = + GENERATE_COPY(from_range(std::begin(supported_devices), std::end(supported_devices))); + supported_devices.pop_back(); + SECTION("PreferredLocation") { ReadWriteManagedMemory(device, hipMemAdviseSetPreferredLocation); } + SECTION("AccessedBy") { ReadWriteManagedMemory(device, hipMemAdviseSetAccessedBy); } } - - -/* 1) Set AccessedBy to device 0 followed by PreferredLocation to device 1 - check for AccessedBy flag using hipMemRangeGetAttribute() it should - return 0 - 2) Unset AccessedBy to 0 and set it to device 1 followed by - PreferredLocation to device 1, check for AccessedBy flag using - hipMemRangeGetAttribute() it should return 1*/ - -TEST_CASE("Unit_hipMemAdvise_TstAccessedByFlg3") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Hmm = NULL, data = 999, Ngpus = 0; - HIP_CHECK(hipGetDeviceCount(&Ngpus)); - if (Ngpus >= 2) { - HIP_CHECK(hipMallocManaged(&Hmm, 2*4096)); - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetAccessedBy, 0)); - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetPreferredLocation, 1)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributeAccessedBy, Hmm, 2*4096)); - if (data != 0) { - WARN("Didnt get expected behavior at line: " << __LINE__); - REQUIRE(false); - } - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseUnsetAccessedBy, 0)); - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetAccessedBy, 1)); - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetPreferredLocation, 0)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributeAccessedBy, Hmm, 2*4096)); - if (data != 1) { - WARN("Didnt get expected behavior at line: " << __LINE__); - REQUIRE(false); - } - HIP_CHECK(hipFree(Hmm)); - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); +TEST_CASE("Unit_hipMemAdvise_Prefetch_After_Advise") { + auto supported_devices = GetDevicesWithAdviseSupport(); + if (supported_devices.empty()) { + HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); } -} - - -/* Set AccessedBy flag to HMM memory launch a kernel and then unset - AccessedBy, launch kernel. We should not have any access issues*/ - -TEST_CASE("Unit_hipMemAdvise_TstAccessedByFlg4") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Hmm = NULL, NumElms = (1024 * 1024), InitVal = 123, blockSize = 64; - int DataMismatch = 0; - hipStream_t strm; - HIP_CHECK(hipStreamCreate(&strm)); - HIP_CHECK(hipMallocManaged(&Hmm, (NumElms * sizeof(int)))); - HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), - hipMemAdviseSetAccessedBy, 0)); - // Initializing memory - for (int i = 0; i < NumElms; ++i) { - Hmm[i] = InitVal; - } - dim3 dimBlock(blockSize, 1, 1); - dim3 dimGrid((NumElms + blockSize -1)/blockSize, 1, 1); - // launching kernel from each one of the gpus - MemAdvise2<<>>(Hmm, NumElms); - HIP_CHECK(hipStreamSynchronize(strm)); - - // verifying the final result - for (int i = 0; i < NumElms; ++i) { - if (Hmm[i] != (InitVal + 10)) { - DataMismatch++; - } - } - - if (DataMismatch != 0) { - WARN("DataMismatch is observed at line: " << __LINE__); - REQUIRE(false); - } - - HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), - hipMemAdviseUnsetAccessedBy, 0)); - MemAdvise2<<>>(Hmm, NumElms); - HIP_CHECK(hipStreamSynchronize(strm)); - // verifying the final result - for (int i = 0; i < NumElms; ++i) { - if (Hmm[i] != (InitVal + (2*10))) { - DataMismatch++; - } - } - - if (DataMismatch != 0) { - WARN("DataMismatch is observed at line: " << __LINE__); - REQUIRE(false); - } - HIP_CHECK(hipFree(Hmm)); - HIP_CHECK(hipStreamDestroy(strm)); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); + supported_devices.push_back(hipCpuDeviceId); + const auto advice = GENERATE(hipMemAdviseSetAccessedBy, hipMemAdviseSetReadMostly, + hipMemAdviseSetPreferredLocation); + const auto device = GENERATE_COPY(from_range(supported_devices)); + + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); + HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, advice, device)); + + for (const auto d : supported_devices) { + HIP_CHECK(hipMemPrefetchAsync(alloc.ptr(), kPageSize, d)); + HIP_CHECK(hipStreamSynchronize(nullptr)); + int32_t attribute = 0; + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), + hipMemRangeAttributeLastPrefetchLocation, alloc.ptr(), + kPageSize)); + REQUIRE(d == attribute); } -} + int32_t attribute = 0; + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), GetMemAdviceAttr(advice), + alloc.ptr(), kPageSize)); + REQUIRE((advice == hipMemAdviseSetReadMostly ? 1 : device) == attribute); +} -/* Allocate memory using aligned_alloc(), assign PreferredLocation flag to - the allocated memory and launch a kernel. Kernel should get executed - successfully without hang or segfault*/ -#if __linux__ && HT_AMD -TEST_CASE("Unit_hipMemAdvise_TstAlignedAllocMem") { - if ((setenv("HSA_XNACK", "1", 1)) != 0) { - WARN("Unable to turn on HSA_XNACK, hence terminating the Test case!"); - REQUIRE(false); - } - // The following code block is used to check for gfx906/8 so as to skip if - // any of the gpus available - int fd1[2]; // Used to store two ends of first pipe - pid_t p; - if (pipe(fd1) == -1) { - fprintf(stderr, "Pipe Failed"); - REQUIRE(false); +TEST_CASE("Unit_hipMemAdvise_AccessedBy_All_Devices") { + auto supported_devices = GetDevicesWithAdviseSupport(); + if (supported_devices.empty()) { + HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); + return; } + supported_devices.push_back(hipCpuDeviceId); - /* GpuId[0] for gfx906 exists--> 1 for yes and 0 for no - GpuId[0] for gfx908 exists--> 1 for yes and 0 for no*/ - int GpuId[2] = {0, 0}; - p = fork(); - - if (p < 0) { - fprintf(stderr, "fork Failed"); - REQUIRE(false); - } else if (p > 0) { // parent process - close(fd1[1]); // Close writing end of first pipe - // Wait for child to send a string - wait(NULL); - // Read string from child and close reading end. - read(fd1[0], GpuId, 2 * sizeof(int)); - close(fd1[0]); - if ((GpuId[0] == 1) || (GpuId[0] == 1)) { - WARN("This test is not applicable on MI60 & MI100." - "Skipping the test!!"); - exit(0); - } - } else { // child process - close(fd1[0]); // Close read end of first pipe - hipDeviceProp_t prop; - HIPCHECK(hipGetDeviceProperties(&prop, 0)); - char *p = NULL; - p = strstr(prop.gcnArchName, "gfx906"); - if (p) { - WARN("gfx906 gpu found on this system!!"); - GpuId[0] = 1; - } - p = strstr(prop.gcnArchName, "gfx908"); - if (p) { - WARN("gfx908 gpu found on this system!!"); - GpuId[1] = 1; - } - // Write concatenated string and close writing end - write(fd1[1], GpuId, 2 * sizeof(int)); - close(fd1[1]); - exit(0); - } - int stat = 0; - if (fork() == 0) { - // The below part should be inside fork - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Mllc = nullptr, MemSz = 4096 * 4, NumElms = 4096, InitVal = 123; - // Mllc = reinterpret_cast<(int *)>(aligned_alloc(4096, MemSz)); - Mllc = reinterpret_cast(aligned_alloc(4096, 4096*4)); - for (int i = 0; i < NumElms; ++i) { - Mllc[i] = InitVal; - } - hipStream_t strm; - int DataMismatch = 0; - HIP_CHECK(hipStreamCreate(&strm)); - // The following hipMemAdvise() call is made to know if advise on - // aligned_alloc() is causing any issue - HIP_CHECK(hipMemAdvise(Mllc, MemSz, hipMemAdviseSetPreferredLocation, 0)); - HIP_CHECK(hipMemPrefetchAsync(Mllc, MemSz, 0, strm)); - HIP_CHECK(hipStreamSynchronize(strm)); - MemAdvise2<<<(NumElms/32), 32, 0, strm>>>(Mllc, NumElms); - HIP_CHECK(hipStreamSynchronize(strm)); - for (int i = 0; i < NumElms; ++i) { - if (Mllc[i] != (InitVal + 10)) { - DataMismatch++; - } - } - if (DataMismatch != 0) { - WARN("DataMismatch observed!!"); - exit(9); // 9 for failure - } else { - exit(10); // 10 for Pass result - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); - } - } else { - wait(&stat); - int Result = WEXITSTATUS(stat); - if (Result != 10) { - REQUIRE(false); - } - } -} -#endif - -/* Allocate Hmm memory, set advise to PreferredLocation and then get - attribute using the api hipMemRangeGetAttribute() for - hipMemRangeAttributeLastPrefetchLocation the value returned should be -2*/ - -TEST_CASE("Unit_hipMemAdvise_TstMemAdvisePrefrdLoc") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Hmm = NULL, data = 999; - HIP_CHECK(hipMallocManaged(&Hmm, 4096)); - HIP_CHECK(hipMemAdvise(Hmm, 4096, hipMemAdviseSetPreferredLocation, 0)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributeLastPrefetchLocation, - Hmm, 4096)); - if (data != -2) { - WARN("Didnt receive expected value."); - REQUIRE(false); - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributePageableMemoryAccess " - "attribute. Hence skipping the testing with Pass result.\n"); + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); + for (const auto device : supported_devices) { + HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, hipMemAdviseSetAccessedBy, device)); } + std::vector accessed_by(supported_devices.size(), hipInvalidDeviceId); + HIP_CHECK(hipMemRangeGetAttribute(accessed_by.data(), sizeof(accessed_by.data()), + hipMemRangeAttributeAccessedBy, alloc.ptr(), kPageSize)); + REQUIRE_THAT(accessed_by, Catch::Matchers::Equals(supported_devices)); } - -/* Allocate HMM memory, set PreferredLocation to device 0, Prfetch the mem - to device1, probe for hipMemRangeAttributeLastPrefetchLocation using - hipMemRangeGetAttribute(), we should get 1*/ - -TEST_CASE("Unit_hipMemAdvise_TstMemAdviseLstPreftchLoc") { - int NumDevs = 0; - HIP_CHECK(hipGetDeviceCount(&NumDevs)); - if (NumDevs >= 2) { - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Hmm = NULL, data = 999; - hipStream_t strm; - HIP_CHECK(hipSetDevice(1)); - HIP_CHECK(hipStreamCreate(&strm)); - HIP_CHECK(hipMallocManaged(&Hmm, 4096)); - HIP_CHECK(hipMemAdvise(Hmm, 4096, hipMemAdviseSetPreferredLocation, 0)); - HIP_CHECK(hipMemPrefetchAsync(Hmm, 4096, 1, strm)); - HIP_CHECK(hipStreamSynchronize(strm)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributeLastPrefetchLocation, - Hmm, 4096)); - if (data != 1) { - WARN("Didnt receive expected value!!"); - REQUIRE(false); - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributePageableMemoryAccess " - "attribute. Hence skipping the testing with Pass result.\n"); - } - } else { - SUCCEED("This system has less than 2 gpus hence skipping the test.\n"); +TEST_CASE("Unit_hipMemAdvise_Negative_Parameters") { + auto supported_devices = GetDevicesWithAdviseSupport(); + if (supported_devices.empty()) { + HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); } -} + const auto device = supported_devices.front(); + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); -/* Allocate HMM memory, set ReadMostly followed by PreferredLocation, probe - for hipMemRangeAttributeReadMostly and hipMemRangeAttributePreferredLocation - using hipMemRangeGetAttribute() we should observe 1 and 0 correspondingly. - In other words setting of hipMemRangeAttributePreferredLocation should not - impact hipMemRangeAttributeReadMostly advise to the memory*/ - -TEST_CASE("Unit_hipMemAdvise_TstMemAdviseMultiFlag") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Hmm = NULL, data = 999; - HIP_CHECK(hipMallocManaged(&Hmm, 4096)); - HIP_CHECK(hipMemAdvise(Hmm, 4096, hipMemAdviseSetReadMostly, 0)); - HIP_CHECK(hipMemAdvise(Hmm, 4096, hipMemAdviseSetPreferredLocation, 0)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributeReadMostly, Hmm, - 4096)); - if (data != 1) { - WARN("Didnt receive expected value at line: " << data); - REQUIRE(false); - } - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributePreferredLocation, Hmm, - 4096)); - if (data != 0) { - WARN("Didnt receive expected value at line: " << data); - REQUIRE(false); - } - HIP_CHECK(hipFree(Hmm)); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributePageableMemoryAccess " - "attribute. Hence skipping the testing with Pass result.\n"); + SECTION("Invalid advice") { + HIP_CHECK_ERROR(hipMemAdvise(alloc.ptr(), kPageSize, static_cast(-1), device), + hipErrorInvalidValue); } -} - - - -/*Allocate Hmm memory, advise it to ReadMostly for gpu: 0 and launch kernel - on all other gpus except 0. This test case may discover any effect or - access denial case arising due to setting ReadMostly only to a particular - gpu*/ - -TEST_CASE("Unit_hipMemAdvise_ReadMosltyMgpuTst") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int Ngpus = 0; - HIP_CHECK(hipGetDeviceCount(&Ngpus)); - if (Ngpus < 2) { - SUCCEED("This test needs atleast two gpus to run." - "Hence skipping the test.\n"); - } - int *Hmm = NULL, NumElms = (1024 * 1024), InitVal = 123, blockSize = 64; - int *Hmm1 = NULL, DataMismatch = 0; - hipStream_t strm; - HIP_CHECK(hipStreamCreate(&strm)); - HIP_CHECK(hipMallocManaged(&Hmm, (NumElms * sizeof(int)))); - // Initializing memory - for (int i = 0; i < NumElms; ++i) { - Hmm[i] = InitVal; - } - HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), - hipMemAdviseSetReadMostly, 0)); - dim3 dimBlock(blockSize, 1, 1); - dim3 dimGrid((NumElms + blockSize -1)/blockSize, 1, 1); -#if HT_AMD - SECTION("Launch Kernel on all other gpus") { - // launching kernel from each one of the gpus - for (int i = 1; i < Ngpus; ++i) { - DataMismatch = 0; - HIP_CHECK(hipSetDevice(i)); - HIP_CHECK(hipMallocManaged(&Hmm1, (NumElms * sizeof(int)))); - MemAdvise3<<>>(Hmm, Hmm1, NumElms); - HIP_CHECK(hipStreamSynchronize(strm)); - // verifying the results - for (int j = 0; j < NumElms; ++j) { - if (Hmm1[j] != (InitVal + 10)) { - DataMismatch++; - } - } - if (DataMismatch != 0) { - WARN("DataMismatch is observed with the gpu: " << i); - REQUIRE(false); - } - HIP_CHECK(hipFree(Hmm1)); - } - } - - SECTION("Launch Kernel on all other gpus and manipulate the content") { - for (int i = 0; i < Ngpus; ++i) { - DataMismatch = 0; - HIP_CHECK(hipSetDevice(i)); - HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), - hipMemAdviseSetReadMostly, i)); - MemAdvise2<<>>(Hmm, NumElms); - HIP_CHECK(hipStreamSynchronize(strm)); - } - // verifying the final result - for (int i = 0; i < NumElms; ++i) { - if (Hmm[i] != (InitVal + Ngpus * 10)) { - DataMismatch++; - } - } - - if (DataMismatch != 0) { - WARN("DataMismatch is observed at line: " << __LINE__); - REQUIRE(false); - } - } -#endif - HIP_CHECK(hipFree(Hmm)); - HIP_CHECK(hipStreamDestroy(strm)); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributePageableMemoryAccess " - "attribute. Hence skipping the testing with Pass result.\n"); + const auto advice = GENERATE(hipMemAdviseSetAccessedBy, hipMemAdviseSetReadMostly, + hipMemAdviseSetPreferredLocation); + SECTION("dev_ptr == nullptr") { + HIP_CHECK_ERROR(hipMemAdvise(nullptr, kPageSize, advice, device), hipErrorInvalidValue); } -} - - -TEST_CASE("Unit_hipMemAdvise_TstSetUnsetPrfrdLoc") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Hmm = NULL, data = 999; - HIP_CHECK(hipMallocManaged(&Hmm, 2*4096)); - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetPreferredLocation, 0)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributePreferredLocation, Hmm, 2*4096)); - if (data != 0) { - WARN("Didnt receive expected value!!"); - REQUIRE(false); - } - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseUnsetPreferredLocation, 0)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributePreferredLocation, Hmm, 2*4096)); - if (data != -2) { - WARN("Didnt receive expected value!!"); - REQUIRE(false); - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributePageableMemoryAccess " - "attribute. Hence skipping the testing with Pass result.\n"); + SECTION("dev_ptr pointing to non-managed memory") { + LinearAllocGuard alloc(LinearAllocs::hipMalloc, kPageSize); + HIP_CHECK_ERROR(hipMemAdvise(alloc.ptr(), kPageSize, advice, device), hipErrorInvalidValue); } -} - + SECTION("Invalid device") { + HIP_CHECK_ERROR(hipMemAdvise(alloc.ptr(), kPageSize, advice, hipInvalidDeviceId), + (advice == hipMemAdviseSetReadMostly ? hipSuccess : hipErrorInvalidDevice)); + } +} \ No newline at end of file diff --git a/tests/catch/unit/memory/hipMemAdvise_old.cc b/tests/catch/unit/memory/hipMemAdvise_old.cc new file mode 100644 index 0000000000..96a0318bb8 --- /dev/null +++ b/tests/catch/unit/memory/hipMemAdvise_old.cc @@ -0,0 +1,962 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* Test Case Description: + Scenario-1: The following Function Tests the working of flags which can be + assigned to HMM memory using hipMemAdvise() api + Scenario-2: Negative tests on hipMemAdvise() api + Scenario-3: The following function tests various scenarios around the flag + 'hipMemAdviseSetPreferredLocation' using HMM memory and hipMemAdvise() api + Scenario-4: The following function tests various scenarios around the flag + 'hipMemAdviseSetReadMostly' using HMM memory and hipMemAdvise() api + Scenario-5: The following function verifies if assigning of a flag + invalidates the earlier flag which was assigned to the same memory region + using hipMemAdvise() + Scenario-6: The following function tests if peers can set + hipMemAdviseSetAccessedBy flag + on HMM memory prefetched on each of the other gpus + Scenario-7: Set AccessedBy flag and check value returned by + hipMemRangeGetAttribute() It should be -2(same is observed on cuda) + Scenario-8: Set AccessedBy flag to device 0 on Hmm memory and prefetch the + memory to device 1, then probe for AccessedBy flag using + hipMemRangeGetAttribute() we should still see the said flag is set for + device 0 + Scenario-9: 1) Set AccessedBy to device 0 followed by PreferredLocation to + device 1 check for AccessedBy flag using hipMemRangeGetAttribute() it should + return 0 + 2) Unset AccessedBy to 0 and set it to device 1 followed by + PreferredLocation to device 1, check for AccessedBy flag using + hipMemRangeGetAttribute() it should return 1 + Scenario-10: Set AccessedBy flag to HMM memory launch a kernel and then unset + AccessedBy, launch kernel. We should not have any access issues + Scenario-11: Allocate memory using aligned_alloc(), assign PreferredLocation + flag to the allocated memory and launch a kernel. Kernel should get executed + successfully without hang or segfault + Scenario-12: Allocate Hmm memory, set advise to PreferredLocation and then + get attribute using the api hipMemRangeGetAttribute() for + hipMemRangeAttributeLastPrefetchLocation the value returned should be -2 + Scenario-13: Allocate HMM memory, set PreferredLocation to device 0, Prfetch + the mem to device1, probe for hipMemRangeAttributeLastPrefetchLocation using + hipMemRangeGetAttribute(), we should get 1 + Scenario-14: Allocate HMM memory, set ReadMostly followed by + PreferredLocation, probe for hipMemRangeAttributeReadMostly and + hipMemRangeAttributePreferredLocation + using hipMemRangeGetAttribute() we should observe 1 and 0 correspondingly. + In other words setting of hipMemRangeAttributePreferredLocation should not + impact hipMemRangeAttributeReadMostly advise to the memory + Scenario-15: Allocate Hmm memory, advise it to ReadMostly for gpu: 0 and + launch kernel on all other gpus except 0. This test case may discover any + effect or access denial case arising due to setting ReadMostly only to a + particular gpu +*/ + +#include +#if __linux__ +#include +#include +#include +#endif + +// Kernel function +__global__ void MemAdvseKernel(int n, int *x) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index < n) + x[index] = x[index] * x[index]; +} + +// Kernel +__global__ void MemAdvise2(int *Hmm, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (int i = index; i < n; i += stride) { + Hmm[i] = Hmm[i] + 10; + } +} + +// Kernel +__global__ void MemAdvise3(int *Hmm, int *Hmm1, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (int i = index; i < n; i += stride) { + Hmm1[i] = Hmm[i] + 10; + } +} + + +static bool CheckError(hipError_t err, int LineNo) { + if (err == hipSuccess) { + WARN("Error expected but received hipSuccess at line no.:" << LineNo); + return false; + } else { + return true; + } +} + +static int HmmAttrPrint() { + int managed = 0; + WARN("The following are the attribute values related to HMM for" + " device 0:\n"); + HIP_CHECK(hipDeviceGetAttribute(&managed, + hipDeviceAttributeDirectManagedMemAccessFromHost, 0)); + WARN("hipDeviceAttributeDirectManagedMemAccessFromHost: " << managed); + HIP_CHECK(hipDeviceGetAttribute(&managed, + hipDeviceAttributeConcurrentManagedAccess, 0)); + WARN("hipDeviceAttributeConcurrentManagedAccess: " << managed); + HIP_CHECK(hipDeviceGetAttribute(&managed, + hipDeviceAttributePageableMemoryAccess, 0)); + WARN("hipDeviceAttributePageableMemoryAccess: " << managed); + HIP_CHECK(hipDeviceGetAttribute(&managed, + hipDeviceAttributePageableMemoryAccessUsesHostPageTables, 0)); + WARN("hipDeviceAttributePageableMemoryAccessUsesHostPageTables:" << managed); + + HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, + 0)); + WARN("hipDeviceAttributeManagedMemory: " << managed); + return managed; +} + + +// The following Function Tests the working of flags which can be assigned +// to HMM memory using hipMemAdvise() api +TEST_CASE("Unit_hipMemAdvise_TstFlags") { + int MangdMem = HmmAttrPrint(); + if (MangdMem == 1) { + bool IfTestPassed = true; + int NumDevs = 0, *Outpt = nullptr; + int MEM_SIZE = 4*1024, A_CONST = 9999; + float *Hmm = nullptr; + int AttrVal = 0; + HIP_CHECK(hipGetDeviceCount(&NumDevs)); + Outpt = new int(NumDevs); + HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE * 2, hipMemAttachGlobal)); + // With the following for loop we iterate through each of the Gpus in the + // system set and unset the flags and check the behavior. + for (int i = 0; i < NumDevs; ++i) { + HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, hipMemAdviseSetReadMostly, i)); + HIP_CHECK(hipMemRangeGetAttribute(&AttrVal, sizeof(AttrVal), + hipMemRangeAttributeReadMostly, Hmm, + MEM_SIZE * 2)); + if (AttrVal != 1) { + WARN("Attempt to set hipMemAdviseSetReadMostly flag failed!\n"); + IfTestPassed = false; + } + HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, hipMemAdviseUnsetReadMostly, + i)); + + HIP_CHECK(hipMemRangeGetAttribute(&AttrVal, sizeof(AttrVal), + hipMemRangeAttributeReadMostly, Hmm, + (MEM_SIZE * 2))); + if (AttrVal != 0) { + WARN("Attempt to Unset hipMemAdviseSetReadMostly flag failed!\n"); + IfTestPassed = false; + } + AttrVal = A_CONST; + // Currently hipMemAdviseSetPreferredLocation and + // hipMemAdviseSetAccessedBy + // flags are resulting in issues: SWDEV-267357 + HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, + hipMemAdviseSetPreferredLocation, i)); + HIP_CHECK(hipMemRangeGetAttribute(&AttrVal, sizeof(AttrVal), + hipMemRangeAttributePreferredLocation, + Hmm, (MEM_SIZE * 2))); + if (AttrVal != i) { + WARN("Attempt to set hipMemAdviseSetPreferredLocation flag failed!\n"); + IfTestPassed = false; + } + AttrVal = A_CONST; + HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, + hipMemAdviseUnsetPreferredLocation, i)); + HIP_CHECK(hipMemRangeGetAttribute(&AttrVal, sizeof(AttrVal), + hipMemRangeAttributePreferredLocation, + Hmm, (MEM_SIZE * 2))); + if (AttrVal == i) { + WARN("Attempt to Unset hipMemAdviseUnsetPreferredLocation "); + WARN("flag failed!\n"); + IfTestPassed = false; + } + for (int m = 0; m < NumDevs; ++m) { + Outpt[m] = A_CONST; + } + HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, hipMemAdviseSetAccessedBy, i)); + HIP_CHECK(hipMemRangeGetAttribute(Outpt, sizeof(Outpt), + hipMemRangeAttributeAccessedBy, Hmm, + (MEM_SIZE * 2))); + if ((Outpt[0]) != i) { + WARN("Attempt to set hipMemAdviseSetAccessedBy flag failed!\n"); + IfTestPassed = false; + } + for (int m = 0; m < NumDevs; ++m) { + Outpt[m] = A_CONST; + } + HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, hipMemAdviseUnsetAccessedBy, + i)); + HIP_CHECK(hipMemRangeGetAttribute(Outpt, sizeof(Outpt), + hipMemRangeAttributeAccessedBy, Hmm, + (MEM_SIZE * 2))); + if ((Outpt[0]) >= 0) { + WARN("Attempt to Unset hipMemAdviseUnsetAccessedBy flag failed!\n"); + IfTestPassed = false; + } + } + delete [] Outpt; + HIP_CHECK(hipFree(Hmm)); + REQUIRE(IfTestPassed); + } else { + SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + +TEST_CASE("Unit_hipMemAdvise_NegtveTsts") { + int MangdMem = HmmAttrPrint(); + if (MangdMem == 1) { + bool IfTestPassed = true; + int NumDevs = 0, MEM_SIZE = 4*1024; + float *Hmm = nullptr; + std::string str; + HIP_CHECK(hipGetDeviceCount(&NumDevs)); + HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE * 2, hipMemAttachGlobal)); +#if HT_AMD + // Passing invalid value(99) device param + IfTestPassed &= CheckError(hipMemAdvise(Hmm, MEM_SIZE * 2, + hipMemAdviseSetReadMostly, 99), __LINE__); + + // Passing invalid value(-12) device param + IfTestPassed &= CheckError(hipMemAdvise(Hmm, MEM_SIZE * 2, + hipMemAdviseSetReadMostly, -12), __LINE__); +#endif + // Passing NULL as first parameter instead of valid pointer to a memory + IfTestPassed &= CheckError(hipMemAdvise(NULL, MEM_SIZE * 2, + hipMemAdviseSetReadMostly, 0), __LINE__); + + // Passing 0 for count(2nd param) parameter + IfTestPassed &= CheckError(hipMemAdvise(Hmm, 0, hipMemAdviseSetReadMostly, + 0), __LINE__); + + // Passing count much more than actually allocated value + IfTestPassed &= CheckError(hipMemAdvise(Hmm, MEM_SIZE * 6, + hipMemAdviseSetReadMostly, 0), __LINE__); + + REQUIRE(IfTestPassed); + } else { + SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + +// The following function tests various scenarios around the flag +// 'hipMemAdviseSetPreferredLocation' using HMM memory and hipMemAdvise() api +TEST_CASE("Unit_hipMemAdvise_PrefrdLoc") { + int MangdMem = HmmAttrPrint(); + if (MangdMem == 1) { + // Check that when a page fault occurs for the memory region set to devPtr, + // the data is migrated to the destn processor + int MEM_SIZE = 4096, A_CONST = 9999; + int *Hmm = nullptr, NumDevs = 0, dev = A_CONST; + bool IfTestPassed = true; + HIP_CHECK(hipGetDeviceCount(&NumDevs)); + HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE * 3, hipMemAttachGlobal)); + for (int i = 0; i < ((MEM_SIZE * 3)/4); ++i) { + Hmm[i] = 4; + } + for (int devId = 0; devId < NumDevs; ++devId) { + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE * 3, + hipMemAdviseSetPreferredLocation, devId)); + int NumElms = ((MEM_SIZE * 3)/4); + MemAdvseKernel<<>>(NumElms, Hmm); + int dev = A_CONST; + HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(dev), + hipMemRangeAttributePreferredLocation, + Hmm, MEM_SIZE * 3)); + if (dev != devId) { + WARN("Memory observed to be not available on expected location\n"); + WARN("line no: " << __LINE__); + WARN("dev: " << dev); + IfTestPassed = false; + } + } + + // Check that when preferred location is set for a memory region, + // data can still be prefetched using hipMemPrefetchAsync + hipStream_t strm; + dev = A_CONST; + for (int devId = 0; devId < NumDevs; ++devId) { + HIP_CHECK(hipSetDevice(devId)); + HIP_CHECK(hipStreamCreate(&strm)); + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE * 3, + hipMemAdviseSetPreferredLocation, devId)); + HIP_CHECK(hipMemPrefetchAsync(Hmm, MEM_SIZE * 3, devId, strm)); + HIP_CHECK(hipStreamSynchronize(strm)); + HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(dev), + hipMemRangeAttributeLastPrefetchLocation, + Hmm, MEM_SIZE * 3)); + if (dev != devId) { + WARN("Memory reported to be not available at the Prefetched "); + WARN("location with device id: " << devId); + WARN("line no: " << __LINE__); + WARN("dev: " << dev); + IfTestPassed = false; + } + HIP_CHECK(hipStreamDestroy(strm)); + } + HIP_CHECK(hipFree(Hmm)); + REQUIRE(IfTestPassed); + } else { + SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + +// The following function tests various scenarios around the flag +// 'hipMemAdviseSetReadMostly' using HMM memory and hipMemAdvise() api + +TEST_CASE("Unit_hipMemAdvise_ReadMostly") { + int MangdMem = HmmAttrPrint(); + if (MangdMem == 1) { + bool IfTestPassed = true; + int MEM_SIZE = 4096, A_CONST = 9999; + float *Hmm = nullptr; + HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE)); + for (uint64_t i = 0; i < (MEM_SIZE/sizeof(float)); ++i) { + Hmm[i] = A_CONST; + } + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetReadMostly, 0)); + // Checking if the data can be read after setting hipMemAdviseSetReadMostly + for (uint64_t i = 0; i < (MEM_SIZE/sizeof(float)); ++i) { + if (Hmm[i] != A_CONST) { + WARN("Didn't find expected value in Hmm memory after setting"); + WARN(" hipMemAdviseSetReadMostly flag line no.: " << __LINE__); + IfTestPassed = false; + } + } + + // Checking if the memory region can be modified + for (uint64_t i = 0; i < (MEM_SIZE/sizeof(float)); ++i) { + Hmm[i] = A_CONST; + } + + for (uint64_t i = 0; i < (MEM_SIZE/sizeof(float)); ++i) { + if (Hmm[i] != A_CONST) { + WARN("Didn't find expected value in Hmm memory after Modification\n"); + WARN("line no.: " << __LINE__); + IfTestPassed = false; + } + } + + int out = A_CONST; + HIP_CHECK(hipMemRangeGetAttribute(&out, 4, hipMemRangeAttributeReadMostly, + Hmm, MEM_SIZE)); + if (out != 1) { + WARN("out value: " << out); + IfTestPassed = false; + } + // Checking the advise attribute after prefetch + HIP_CHECK(hipMemPrefetchAsync(Hmm, MEM_SIZE, 0, 0)); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipMemRangeGetAttribute(&out, sizeof(int), + hipMemRangeAttributeReadMostly, Hmm, + MEM_SIZE)); + if (out != 1) { + WARN("Attribute assigned to memory changed after calling "); + WARN("hipMemPrefetchAsync(). line no.: " << __LINE__); + WARN("out value: " << out); + IfTestPassed = false; + } + HIP_CHECK(hipFree(Hmm)); + REQUIRE(IfTestPassed); + } else { + SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + +// The following function verifies if assigning of a flag invalidates the +// earlier flag which was assigned to the same memory region using +// hipMemAdvise() +TEST_CASE("Unit_hipMemAdvise_TstFlgOverrideEffect") { + int MangdMem = HmmAttrPrint(); + if (MangdMem == 1) { + bool IfTestPassed = true; + int MEM_SIZE = 4*4096, A_CONST = 9999; + float *Hmm = nullptr; + int NumDevs = 0, dev = A_CONST; + + HIP_CHECK(hipGetDeviceCount(&NumDevs)); + HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE, hipMemAttachGlobal)); + for (int i = 0; i < NumDevs; ++i) { + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetReadMostly, i)); + HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(int), + hipMemRangeAttributeReadMostly, Hmm, + MEM_SIZE)); + if (dev != 1) { + WARN("hipMemAdviseSetReadMostly flag did not take affect despite "); + WARN("setting it using hipMemAdvise(). line no.: " << __LINE__); + IfTestPassed = false; + break; + } + dev = A_CONST; + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetPreferredLocation, + i)); + HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(int), + hipMemRangeAttributePreferredLocation, + Hmm, MEM_SIZE)); + if (dev != i) { + WARN("hipMemAdviseSetPreferredLocation flag did not take affect "); + WARN("despite setting it using hipMemAdvise()\n"); + WARN("line no.: " << __LINE__); + IfTestPassed = false; + break; + } + + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetAccessedBy, i)); + dev = A_CONST; + HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(int), + hipMemRangeAttributeAccessedBy, Hmm, + MEM_SIZE)); + if (dev != i) { + WARN("hipMemAdviseSetAccessedBy flag did not take affect despite "); + WARN("setting it using hipMemAdvise(). line no.: " << __LINE__); + IfTestPassed = false; + break; + } + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseUnsetAccessedBy, i)); + } + HIP_CHECK(hipFree(Hmm)); + REQUIRE(IfTestPassed); + } else { + SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + + +// The following function tests if peers can set hipMemAdviseSetAccessedBy flag +// on HMM memory prefetched on each of the other gpus +#if HT_AMD +TEST_CASE("Unit_hipMemAdvise_TstAccessedByPeer") { + int MangdMem = HmmAttrPrint(); + if (MangdMem == 1) { + bool IfTestPassed = true; + int *Hmm = nullptr, MEM_SIZE = 4*4096, A_CONST = 9999;; + int NumDevs = 0, CanAccessPeer = A_CONST, flag = 0; + + HIP_CHECK(hipGetDeviceCount(&NumDevs)); + if (NumDevs < 2) { + SUCCEED("Test TestSetAccessedByPeer() need atleast two Gpus to test" + " the scenario. This system has GPUs less than 2"); + } + HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE, hipMemAttachGlobal)); + for (int i = 0; i < NumDevs; ++i) { + HIP_CHECK(hipMemPrefetchAsync(Hmm, MEM_SIZE, i, 0)); + for (int j = 0; j < NumDevs; ++j) { + if (i == j) + continue; + HIP_CHECK(hipSetDevice(j)); + HIP_CHECK(hipDeviceCanAccessPeer(&CanAccessPeer, j, i)); + if (CanAccessPeer) { + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetAccessedBy, j)); + for (uint64_t m = 0; m < (MEM_SIZE/sizeof(int)); ++m) { + Hmm[m] = 4; + } + HIP_CHECK(hipDeviceEnablePeerAccess(i, 0)); + MemAdvseKernel<<<(MEM_SIZE/sizeof(int)/32), 32>>>( + (MEM_SIZE/sizeof(int)), Hmm); + HIP_CHECK(hipDeviceSynchronize()); + // Verifying the result + for (uint64_t m = 0; m < (MEM_SIZE/sizeof(int)); ++m) { + if (Hmm[m] != 16) { + flag = 1; + } + } + if (flag) { + WARN("Didnt get Expected results with device: " << j); + WARN("line no.: " << __LINE__); + IfTestPassed = false; + flag = 0; + } + } + } + } + HIP_CHECK(hipFree(Hmm)); + REQUIRE(IfTestPassed); + } else { + SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} +#endif + + +/* Set AccessedBy flag and check value returned by hipMemRangeGetAttribute() + It should be -2(same is observed on cuda)*/ +TEST_CASE("Unit_hipMemAdvise_TstAccessedByFlg") { + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Hmm = NULL, data = 999; + HIP_CHECK(hipMallocManaged(&Hmm, 2*4096)); + HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetAccessedBy, 0)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), + hipMemRangeAttributeLastPrefetchLocation, + Hmm, 2*4096)); + if (data != -2) { + WARN("Didnt get expected value!!\n"); + REQUIRE(false); + } + } else { + SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + +/* Set AccessedBy flag to device 0 on Hmm memory and prefetch the memory to + device 1, then probe for AccessedBy flag using hipMemRangeGetAttribute() + we should still see the said flag is set for device 0*/ +TEST_CASE("Unit_hipMemAdvise_TstAccessedByFlg2") { + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Hmm = NULL, data = 999, Ngpus = 0; + HIP_CHECK(hipGetDeviceCount(&Ngpus)); + if (Ngpus >= 2) { + hipStream_t strm; + HIP_CHECK(hipStreamCreate(&strm)); + HIP_CHECK(hipMallocManaged(&Hmm, 2*4096)); + HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetAccessedBy, 0)); + HIP_CHECK(hipMemPrefetchAsync(Hmm, 2*4096, 1, strm)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), + hipMemRangeAttributeAccessedBy, Hmm, 2*4096)); + if (data != 0) { + WARN("Didnt get expected behavior at line: " << __LINE__); + REQUIRE(false); + } + HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseUnsetAccessedBy, 0)); + HIP_CHECK(hipStreamDestroy(strm)); + HIP_CHECK(hipFree(Hmm)); + } + } else { + SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + + + +/* 1) Set AccessedBy to device 0 followed by PreferredLocation to device 1 + check for AccessedBy flag using hipMemRangeGetAttribute() it should + return 0 + 2) Unset AccessedBy to 0 and set it to device 1 followed by + PreferredLocation to device 1, check for AccessedBy flag using + hipMemRangeGetAttribute() it should return 1*/ + +TEST_CASE("Unit_hipMemAdvise_TstAccessedByFlg3") { + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Hmm = NULL, data = 999, Ngpus = 0; + HIP_CHECK(hipGetDeviceCount(&Ngpus)); + if (Ngpus >= 2) { + HIP_CHECK(hipMallocManaged(&Hmm, 2*4096)); + HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetAccessedBy, 0)); + HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetPreferredLocation, 1)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), + hipMemRangeAttributeAccessedBy, Hmm, 2*4096)); + if (data != 0) { + WARN("Didnt get expected behavior at line: " << __LINE__); + REQUIRE(false); + } + HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseUnsetAccessedBy, 0)); + HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetAccessedBy, 1)); + HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetPreferredLocation, 0)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), + hipMemRangeAttributeAccessedBy, Hmm, 2*4096)); + if (data != 1) { + WARN("Didnt get expected behavior at line: " << __LINE__); + REQUIRE(false); + } + HIP_CHECK(hipFree(Hmm)); + } + } else { + SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + + +/* Set AccessedBy flag to HMM memory launch a kernel and then unset + AccessedBy, launch kernel. We should not have any access issues*/ + +TEST_CASE("Unit_hipMemAdvise_TstAccessedByFlg4") { + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Hmm = NULL, NumElms = (1024 * 1024), InitVal = 123, blockSize = 64; + int DataMismatch = 0; + hipStream_t strm; + HIP_CHECK(hipStreamCreate(&strm)); + HIP_CHECK(hipMallocManaged(&Hmm, (NumElms * sizeof(int)))); + HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), + hipMemAdviseSetAccessedBy, 0)); + // Initializing memory + for (int i = 0; i < NumElms; ++i) { + Hmm[i] = InitVal; + } + dim3 dimBlock(blockSize, 1, 1); + dim3 dimGrid((NumElms + blockSize -1)/blockSize, 1, 1); + // launching kernel from each one of the gpus + MemAdvise2<<>>(Hmm, NumElms); + HIP_CHECK(hipStreamSynchronize(strm)); + + // verifying the final result + for (int i = 0; i < NumElms; ++i) { + if (Hmm[i] != (InitVal + 10)) { + DataMismatch++; + } + } + + if (DataMismatch != 0) { + WARN("DataMismatch is observed at line: " << __LINE__); + REQUIRE(false); + } + + HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), + hipMemAdviseUnsetAccessedBy, 0)); + MemAdvise2<<>>(Hmm, NumElms); + HIP_CHECK(hipStreamSynchronize(strm)); + // verifying the final result + for (int i = 0; i < NumElms; ++i) { + if (Hmm[i] != (InitVal + (2*10))) { + DataMismatch++; + } + } + + if (DataMismatch != 0) { + WARN("DataMismatch is observed at line: " << __LINE__); + REQUIRE(false); + } + HIP_CHECK(hipFree(Hmm)); + HIP_CHECK(hipStreamDestroy(strm)); + } else { + SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + + +/* Allocate memory using aligned_alloc(), assign PreferredLocation flag to + the allocated memory and launch a kernel. Kernel should get executed + successfully without hang or segfault*/ +#if __linux__ && HT_AMD +TEST_CASE("Unit_hipMemAdvise_TstAlignedAllocMem") { + if ((setenv("HSA_XNACK", "1", 1)) != 0) { + WARN("Unable to turn on HSA_XNACK, hence terminating the Test case!"); + REQUIRE(false); + } + // The following code block is used to check for gfx906/8 so as to skip if + // any of the gpus available + int fd1[2]; // Used to store two ends of first pipe + pid_t p; + if (pipe(fd1) == -1) { + fprintf(stderr, "Pipe Failed"); + REQUIRE(false); + } + + /* GpuId[0] for gfx906 exists--> 1 for yes and 0 for no + GpuId[0] for gfx908 exists--> 1 for yes and 0 for no*/ + int GpuId[2] = {0, 0}; + p = fork(); + + if (p < 0) { + fprintf(stderr, "fork Failed"); + REQUIRE(false); + } else if (p > 0) { // parent process + close(fd1[1]); // Close writing end of first pipe + // Wait for child to send a string + wait(NULL); + // Read string from child and close reading end. + read(fd1[0], GpuId, 2 * sizeof(int)); + close(fd1[0]); + if ((GpuId[0] == 1) || (GpuId[0] == 1)) { + WARN("This test is not applicable on MI60 & MI100." + "Skipping the test!!"); + exit(0); + } + } else { // child process + close(fd1[0]); // Close read end of first pipe + hipDeviceProp_t prop; + HIPCHECK(hipGetDeviceProperties(&prop, 0)); + char *p = NULL; + p = strstr(prop.gcnArchName, "gfx906"); + if (p) { + WARN("gfx906 gpu found on this system!!"); + GpuId[0] = 1; + } + p = strstr(prop.gcnArchName, "gfx908"); + if (p) { + WARN("gfx908 gpu found on this system!!"); + GpuId[1] = 1; + } + // Write concatenated string and close writing end + write(fd1[1], GpuId, 2 * sizeof(int)); + close(fd1[1]); + exit(0); + } + int stat = 0; + if (fork() == 0) { + // The below part should be inside fork + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Mllc = nullptr, MemSz = 4096 * 4, NumElms = 4096, InitVal = 123; + // Mllc = reinterpret_cast<(int *)>(aligned_alloc(4096, MemSz)); + Mllc = reinterpret_cast(aligned_alloc(4096, 4096*4)); + for (int i = 0; i < NumElms; ++i) { + Mllc[i] = InitVal; + } + hipStream_t strm; + int DataMismatch = 0; + HIP_CHECK(hipStreamCreate(&strm)); + // The following hipMemAdvise() call is made to know if advise on + // aligned_alloc() is causing any issue + HIP_CHECK(hipMemAdvise(Mllc, MemSz, hipMemAdviseSetPreferredLocation, 0)); + HIP_CHECK(hipMemPrefetchAsync(Mllc, MemSz, 0, strm)); + HIP_CHECK(hipStreamSynchronize(strm)); + MemAdvise2<<<(NumElms/32), 32, 0, strm>>>(Mllc, NumElms); + HIP_CHECK(hipStreamSynchronize(strm)); + for (int i = 0; i < NumElms; ++i) { + if (Mllc[i] != (InitVal + 10)) { + DataMismatch++; + } + } + if (DataMismatch != 0) { + WARN("DataMismatch observed!!"); + exit(9); // 9 for failure + } else { + exit(10); // 10 for Pass result + } + } else { + SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } + } else { + wait(&stat); + int Result = WEXITSTATUS(stat); + if (Result != 10) { + REQUIRE(false); + } + } +} +#endif + +/* Allocate Hmm memory, set advise to PreferredLocation and then get + attribute using the api hipMemRangeGetAttribute() for + hipMemRangeAttributeLastPrefetchLocation the value returned should be -2*/ + +TEST_CASE("Unit_hipMemAdvise_TstMemAdvisePrefrdLoc") { + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Hmm = NULL, data = 999; + HIP_CHECK(hipMallocManaged(&Hmm, 4096)); + HIP_CHECK(hipMemAdvise(Hmm, 4096, hipMemAdviseSetPreferredLocation, 0)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), + hipMemRangeAttributeLastPrefetchLocation, + Hmm, 4096)); + if (data != -2) { + WARN("Didnt receive expected value."); + REQUIRE(false); + } + } else { + SUCCEED("GPU 0 doesn't support hipDeviceAttributePageableMemoryAccess " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + + +/* Allocate HMM memory, set PreferredLocation to device 0, Prfetch the mem + to device1, probe for hipMemRangeAttributeLastPrefetchLocation using + hipMemRangeGetAttribute(), we should get 1*/ + +TEST_CASE("Unit_hipMemAdvise_TstMemAdviseLstPreftchLoc") { + int NumDevs = 0; + HIP_CHECK(hipGetDeviceCount(&NumDevs)); + if (NumDevs >= 2) { + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Hmm = NULL, data = 999; + hipStream_t strm; + HIP_CHECK(hipSetDevice(1)); + HIP_CHECK(hipStreamCreate(&strm)); + HIP_CHECK(hipMallocManaged(&Hmm, 4096)); + HIP_CHECK(hipMemAdvise(Hmm, 4096, hipMemAdviseSetPreferredLocation, 0)); + HIP_CHECK(hipMemPrefetchAsync(Hmm, 4096, 1, strm)); + HIP_CHECK(hipStreamSynchronize(strm)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), + hipMemRangeAttributeLastPrefetchLocation, + Hmm, 4096)); + if (data != 1) { + WARN("Didnt receive expected value!!"); + REQUIRE(false); + } + } else { + SUCCEED("GPU 0 doesn't support hipDeviceAttributePageableMemoryAccess " + "attribute. Hence skipping the testing with Pass result.\n"); + } + } else { + SUCCEED("This system has less than 2 gpus hence skipping the test.\n"); + } +} + + +/* Allocate HMM memory, set ReadMostly followed by PreferredLocation, probe + for hipMemRangeAttributeReadMostly and hipMemRangeAttributePreferredLocation + using hipMemRangeGetAttribute() we should observe 1 and 0 correspondingly. + In other words setting of hipMemRangeAttributePreferredLocation should not + impact hipMemRangeAttributeReadMostly advise to the memory*/ + +TEST_CASE("Unit_hipMemAdvise_TstMemAdviseMultiFlag") { + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Hmm = NULL, data = 999; + HIP_CHECK(hipMallocManaged(&Hmm, 4096)); + HIP_CHECK(hipMemAdvise(Hmm, 4096, hipMemAdviseSetReadMostly, 0)); + HIP_CHECK(hipMemAdvise(Hmm, 4096, hipMemAdviseSetPreferredLocation, 0)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), + hipMemRangeAttributeReadMostly, Hmm, + 4096)); + if (data != 1) { + WARN("Didnt receive expected value at line: " << data); + REQUIRE(false); + } + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), + hipMemRangeAttributePreferredLocation, Hmm, + 4096)); + if (data != 0) { + WARN("Didnt receive expected value at line: " << data); + REQUIRE(false); + } + HIP_CHECK(hipFree(Hmm)); + } else { + SUCCEED("GPU 0 doesn't support hipDeviceAttributePageableMemoryAccess " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + + + +/*Allocate Hmm memory, advise it to ReadMostly for gpu: 0 and launch kernel + on all other gpus except 0. This test case may discover any effect or + access denial case arising due to setting ReadMostly only to a particular + gpu*/ + +TEST_CASE("Unit_hipMemAdvise_ReadMosltyMgpuTst") { + int managed = HmmAttrPrint(); + if (managed == 1) { + int Ngpus = 0; + HIP_CHECK(hipGetDeviceCount(&Ngpus)); + if (Ngpus < 2) { + SUCCEED("This test needs atleast two gpus to run." + "Hence skipping the test.\n"); + } + int *Hmm = NULL, NumElms = (1024 * 1024), InitVal = 123, blockSize = 64; + int *Hmm1 = NULL, DataMismatch = 0; + hipStream_t strm; + HIP_CHECK(hipStreamCreate(&strm)); + HIP_CHECK(hipMallocManaged(&Hmm, (NumElms * sizeof(int)))); + // Initializing memory + for (int i = 0; i < NumElms; ++i) { + Hmm[i] = InitVal; + } + HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), + hipMemAdviseSetReadMostly, 0)); + dim3 dimBlock(blockSize, 1, 1); + dim3 dimGrid((NumElms + blockSize -1)/blockSize, 1, 1); +#if HT_AMD + SECTION("Launch Kernel on all other gpus") { + // launching kernel from each one of the gpus + for (int i = 1; i < Ngpus; ++i) { + DataMismatch = 0; + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipMallocManaged(&Hmm1, (NumElms * sizeof(int)))); + MemAdvise3<<>>(Hmm, Hmm1, NumElms); + HIP_CHECK(hipStreamSynchronize(strm)); + // verifying the results + for (int j = 0; j < NumElms; ++j) { + if (Hmm1[j] != (InitVal + 10)) { + DataMismatch++; + } + } + if (DataMismatch != 0) { + WARN("DataMismatch is observed with the gpu: " << i); + REQUIRE(false); + } + HIP_CHECK(hipFree(Hmm1)); + } + } + + SECTION("Launch Kernel on all other gpus and manipulate the content") { + for (int i = 0; i < Ngpus; ++i) { + DataMismatch = 0; + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), + hipMemAdviseSetReadMostly, i)); + MemAdvise2<<>>(Hmm, NumElms); + HIP_CHECK(hipStreamSynchronize(strm)); + } + // verifying the final result + for (int i = 0; i < NumElms; ++i) { + if (Hmm[i] != (InitVal + Ngpus * 10)) { + DataMismatch++; + } + } + + if (DataMismatch != 0) { + WARN("DataMismatch is observed at line: " << __LINE__); + REQUIRE(false); + } + } +#endif + HIP_CHECK(hipFree(Hmm)); + HIP_CHECK(hipStreamDestroy(strm)); + } else { + SUCCEED("GPU 0 doesn't support hipDeviceAttributePageableMemoryAccess " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + + +TEST_CASE("Unit_hipMemAdvise_TstSetUnsetPrfrdLoc") { + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Hmm = NULL, data = 999; + HIP_CHECK(hipMallocManaged(&Hmm, 2*4096)); + HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetPreferredLocation, 0)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), + hipMemRangeAttributePreferredLocation, Hmm, 2*4096)); + if (data != 0) { + WARN("Didnt receive expected value!!"); + REQUIRE(false); + } + HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseUnsetPreferredLocation, 0)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), + hipMemRangeAttributePreferredLocation, Hmm, 2*4096)); + if (data != -2) { + WARN("Didnt receive expected value!!"); + REQUIRE(false); + } + } else { + SUCCEED("GPU 0 doesn't support hipDeviceAttributePageableMemoryAccess " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + From 5a1878ce5d3ddedd218a48af5dcaae7ce98d6132 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Tue, 4 Oct 2022 11:24:41 +0200 Subject: [PATCH 05/24] EXSWHTEC-83 - Correct bug in Flags_Do_Not_Cause_Prefetch, and minor code corectness touchups --- tests/catch/unit/memory/hipMemAdvise.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/catch/unit/memory/hipMemAdvise.cc b/tests/catch/unit/memory/hipMemAdvise.cc index 30bd744e45..233cc03eee 100644 --- a/tests/catch/unit/memory/hipMemAdvise.cc +++ b/tests/catch/unit/memory/hipMemAdvise.cc @@ -74,7 +74,7 @@ TEST_CASE("Unit_hipMemAdvise_Set_Unset_Basic") { const auto SetUnset = [=](const hipMemoryAdvise advice) { LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); - int32_t attribute = 0u; + int32_t attribute = 0; HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, advice, device)); HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), GetMemAdviceAttr(advice), alloc.ptr(), kPageSize)); @@ -110,7 +110,7 @@ TEST_CASE("Unit_hipMemAdvise_No_Flag_Interference") { } for (const auto a : advice) { - auto attribute = 0u; + int32_t attribute = 0; HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), GetMemAdviceAttr(a), alloc.ptr(), kPageSize)); REQUIRE((a == hipMemAdviseSetReadMostly ? 1 : device) == attribute); @@ -158,8 +158,8 @@ TEST_CASE("Unit_hipMemAdvise_Flags_Do_Not_Cause_Prefetch") { const auto Test = [](const int device, const hipMemoryAdvise advice) { LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); - HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, hipMemAdviseSetPreferredLocation, device)); - int32_t attribute = 0u; + HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, advice, device)); + int32_t attribute = 0; HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), hipMemRangeAttributeLastPrefetchLocation, alloc.ptr(), kPageSize)); @@ -193,7 +193,7 @@ TEST_CASE("Unit_hipMemAdvise_Read_Write_After_Advise") { ArrayFindIfNot(alloc.ptr(), i, count); } - int32_t attribute = 0u; + int32_t attribute = 0; HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), GetMemAdviceAttr(advice), alloc.ptr(), kPageSize)); REQUIRE((advice == hipMemAdviseSetReadMostly ? 1 : device) == attribute); From 56b598280215c7f895be5c1e2eab84aae8892447 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Tue, 4 Oct 2022 14:44:21 +0200 Subject: [PATCH 06/24] EXSWHTEC-83 - Implement additional negative parameter tests --- tests/catch/unit/memory/hipMemAdvise.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/catch/unit/memory/hipMemAdvise.cc b/tests/catch/unit/memory/hipMemAdvise.cc index 233cc03eee..37181e36c1 100644 --- a/tests/catch/unit/memory/hipMemAdvise.cc +++ b/tests/catch/unit/memory/hipMemAdvise.cc @@ -270,6 +270,12 @@ TEST_CASE("Unit_hipMemAdvise_Negative_Parameters") { } const auto advice = GENERATE(hipMemAdviseSetAccessedBy, hipMemAdviseSetReadMostly, hipMemAdviseSetPreferredLocation); + SECTION("count == 0") { + HIP_CHECK_ERROR(hipMemAdvise(alloc.ptr(), 0, advice, device), hipErrorInvalidValue); + } + SECTION("count larger than allocation size") { + HIP_CHECK_ERROR(hipMemAdvise(alloc.ptr(), kPageSize + 1, advice, device), hipErrorInvalidValue); + } SECTION("dev_ptr == nullptr") { HIP_CHECK_ERROR(hipMemAdvise(nullptr, kPageSize, advice, device), hipErrorInvalidValue); } From 8a0e740cfe7eb72519346902a4ee1c3f0fc44e03 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Tue, 4 Oct 2022 16:13:23 +0200 Subject: [PATCH 07/24] Add error check after launching delay kernel --- tests/catch/include/utils.hh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/catch/include/utils.hh b/tests/catch/include/utils.hh index 1448c4f768..2aa3ffa066 100644 --- a/tests/catch/include/utils.hh +++ b/tests/catch/include/utils.hh @@ -84,6 +84,7 @@ inline void LaunchDelayKernel(const std::chrono::milliseconds interval, const hi // Clock rate is in kHz => number of clock ticks in a millisecond HIP_CHECK(hipDeviceGetAttribute(&ticks_per_ms, hipDeviceAttributeClockRate, 0)); Delay<<<1, 1, 0, stream>>>(interval.count(), ticks_per_ms); + HIP_CHECK(hipGetLastError()); } template From e534d2e050446f4e936c0de76fc81b5858782654 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Thu, 6 Oct 2022 15:47:34 +0200 Subject: [PATCH 08/24] EXSWHTEC-94 - Implement helper classes and functions for memory tests --- tests/catch/include/resource_guards.hh | 144 +++++++++++++++++++++++++ tests/catch/include/utils.hh | 102 ++++++++++++++++++ 2 files changed, 246 insertions(+) create mode 100644 tests/catch/include/resource_guards.hh create mode 100644 tests/catch/include/utils.hh diff --git a/tests/catch/include/resource_guards.hh b/tests/catch/include/resource_guards.hh new file mode 100644 index 0000000000..f8d1688312 --- /dev/null +++ b/tests/catch/include/resource_guards.hh @@ -0,0 +1,144 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include + +enum class LinearAllocs { + malloc, + mallocAndRegister, + hipHostMalloc, + hipMalloc, + hipMallocManaged, +}; + +template class LinearAllocGuard { + public: + LinearAllocGuard(const LinearAllocs allocation_type, const size_t size, + const unsigned int flags = 0u) + : allocation_type_{allocation_type} { + switch (allocation_type_) { + case LinearAllocs::malloc: + ptr_ = host_ptr_ = reinterpret_cast(malloc(size)); + break; + case LinearAllocs::mallocAndRegister: + host_ptr_ = reinterpret_cast(malloc(size)); + HIP_CHECK(hipHostRegister(host_ptr_, size, flags)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&ptr_), host_ptr_, 0u)); + break; + case LinearAllocs::hipHostMalloc: + HIP_CHECK(hipHostMalloc(reinterpret_cast(&ptr_), size, flags)); + host_ptr_ = ptr_; + break; + case LinearAllocs::hipMalloc: + HIP_CHECK(hipMalloc(reinterpret_cast(&ptr_), size)); + break; + case LinearAllocs::hipMallocManaged: + HIP_CHECK(hipMallocManaged(reinterpret_cast(&ptr_), size, flags ? flags : 1u)); + host_ptr_ = ptr_; + } + } + + LinearAllocGuard(const LinearAllocGuard&) = delete; + LinearAllocGuard(LinearAllocGuard&&) = delete; + + ~LinearAllocGuard() { + // No Catch macros, don't want to possibly throw in the destructor + switch (allocation_type_) { + case LinearAllocs::malloc: + free(ptr_); + break; + case LinearAllocs::mallocAndRegister: + // Cast to void to suppress nodiscard warnings + static_cast(hipHostUnregister(host_ptr_)); + free(host_ptr_); + break; + case LinearAllocs::hipHostMalloc: + static_cast(hipHostFree(ptr_)); + break; + case LinearAllocs::hipMalloc: + case LinearAllocs::hipMallocManaged: + static_cast(hipFree(ptr_)); + } + } + + T* ptr() { return ptr_; }; + T* const ptr() const { return ptr_; }; + T* host_ptr() { return host_ptr_; } + T* const host_ptr() const { return host_ptr(); } + + private: + const LinearAllocs allocation_type_; + T* ptr_ = nullptr; + T* host_ptr_ = nullptr; +}; + +enum class Streams { nullstream, perThread, created }; + +class StreamGuard { + public: + StreamGuard(const Streams stream_type) : stream_type_{stream_type} { + switch (stream_type_) { + case Streams::nullstream: + stream_ = nullptr; + break; + case Streams::perThread: + stream_ = hipStreamPerThread; + break; + case Streams::created: + HIP_CHECK(hipStreamCreate(&stream_)); + } + } + + StreamGuard(const StreamGuard&) = delete; + StreamGuard(StreamGuard&&) = delete; + + ~StreamGuard() { + if (stream_type_ == Streams::created) { + static_cast(hipStreamDestroy(stream_)); + } + } + + hipStream_t stream() const { return stream_; } + + private: + const Streams stream_type_; + hipStream_t stream_; +}; + +inline unsigned int GenerateLinearAllocationFlagCombinations(const LinearAllocs allocation_type) { + switch (allocation_type) { + case LinearAllocs::mallocAndRegister: + // TODO + return 0; + case LinearAllocs::hipHostMalloc: + return GENERATE(hipHostMallocDefault, hipHostMallocPortable, hipHostMallocMapped, + hipHostMallocWriteCombined); + case LinearAllocs::hipMallocManaged: + // TODO + return 1u; + case LinearAllocs::malloc: + case LinearAllocs::hipMalloc: + return 0u; + default: + assert("Invalid LinearAllocs enumerator"); + } +} \ No newline at end of file diff --git a/tests/catch/include/utils.hh b/tests/catch/include/utils.hh new file mode 100644 index 0000000000..9edffc6f7c --- /dev/null +++ b/tests/catch/include/utils.hh @@ -0,0 +1,102 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +#include +#include + +namespace { +inline constexpr size_t kPageSize = 4096; +} // anonymous namespace + +template +void ArrayMismatch(T* const expected, T* const actual, const size_t num_elements) { + const auto ret = std::mismatch(expected, expected + num_elements, actual); + if (ret.first != expected + num_elements) { + const auto idx = std::distance(expected, ret.first); + INFO("Value mismatch at index: " << idx); + REQUIRE(expected[idx] == actual[idx]); + } +} + +template void ArrayFindIfNot(It begin, It end, const T expected_value) { + const auto it = std::find_if_not( + begin, end, [expected_value](const int elem) { return expected_value == elem; }); + + if (it != end) { + const auto idx = std::distance(begin, it); + INFO("Value mismatch at index " << idx); + REQUIRE(expected_value == *it); + } +} + +template +void ArrayFindIfNot(T* const array, const T expected_value, const size_t num_elements) { + ArrayFindIfNot(array, array + num_elements, expected_value); +} + +template +__global__ void VectorIncrement(T* const vec, const T increment_value, size_t N) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < N; i += stride) { + vec[i] += increment_value; + } +} + +template __global__ void VectorSet(T* const vec, const T value, size_t N) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < N; i += stride) { + vec[i] = value; + } +} + +// Will execute for atleast interval milliseconds +static __global__ void Delay(uint32_t interval, const uint32_t ticks_per_ms) { + while (interval--) { + uint64_t start = clock(); + while (clock() - start < ticks_per_ms) { + } + } +} + +inline void LaunchDelayKernel(const std::chrono::milliseconds interval, const hipStream_t stream) { + int ticks_per_ms = 0; + // Clock rate is in kHz => number of clock ticks in a millisecond + HIP_CHECK(hipDeviceGetAttribute(&ticks_per_ms, hipDeviceAttributeClockRate, 0)); + Delay<<<1, 1, 0, stream>>>(interval.count(), ticks_per_ms); + HIP_CHECK(hipGetLastError()); +} + +template +inline bool DeviceAttributesSupport(const int device, Attributes... attributes) { + constexpr auto DeviceAttributeSupport = [](const int device, + const hipDeviceAttribute_t attribute) { + int value = 0; + HIP_CHECK(hipDeviceGetAttribute(&value, attribute, device)); + return value; + }; + return (... && DeviceAttributeSupport(device, attributes)); +} \ No newline at end of file From 09ce86ac14450d712725a7e10377072c8b9050f0 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Thu, 6 Oct 2022 16:19:21 +0200 Subject: [PATCH 09/24] EXSWHTEC-94 - Remove c++14 standard constraint on memory tests --- tests/catch/unit/memory/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/catch/unit/memory/CMakeLists.txt b/tests/catch/unit/memory/CMakeLists.txt index f24c63ad8c..54e7708556 100644 --- a/tests/catch/unit/memory/CMakeLists.txt +++ b/tests/catch/unit/memory/CMakeLists.txt @@ -183,5 +183,4 @@ endif() hip_add_exe_to_target(NAME MemoryTest TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests - COMPILE_OPTIONS -std=c++14) + TEST_TARGET_NAME build_tests) From 48b337f4363897a27649960dd9a3523190ccac1b Mon Sep 17 00:00:00 2001 From: Dino Music Date: Thu, 6 Oct 2022 12:09:41 -0400 Subject: [PATCH 10/24] EXSWHTEC-94 - Remove GenerateLinearAllocationFlagCombinations until finished --- tests/catch/include/resource_guards.hh | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/tests/catch/include/resource_guards.hh b/tests/catch/include/resource_guards.hh index f8d1688312..7e6179c81a 100644 --- a/tests/catch/include/resource_guards.hh +++ b/tests/catch/include/resource_guards.hh @@ -122,23 +122,4 @@ class StreamGuard { private: const Streams stream_type_; hipStream_t stream_; -}; - -inline unsigned int GenerateLinearAllocationFlagCombinations(const LinearAllocs allocation_type) { - switch (allocation_type) { - case LinearAllocs::mallocAndRegister: - // TODO - return 0; - case LinearAllocs::hipHostMalloc: - return GENERATE(hipHostMallocDefault, hipHostMallocPortable, hipHostMallocMapped, - hipHostMallocWriteCombined); - case LinearAllocs::hipMallocManaged: - // TODO - return 1u; - case LinearAllocs::malloc: - case LinearAllocs::hipMalloc: - return 0u; - default: - assert("Invalid LinearAllocs enumerator"); - } -} \ No newline at end of file +}; \ No newline at end of file From 715cf30e7715bea49d4137b4284556a10df7e9c1 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Thu, 6 Oct 2022 15:47:34 +0200 Subject: [PATCH 11/24] EXSWHTEC-94 - Implement helper classes and functions for memory tests --- tests/catch/include/resource_guards.hh | 144 +++++++++++++++++++++++++ tests/catch/include/utils.hh | 102 ++++++++++++++++++ 2 files changed, 246 insertions(+) create mode 100644 tests/catch/include/resource_guards.hh create mode 100644 tests/catch/include/utils.hh diff --git a/tests/catch/include/resource_guards.hh b/tests/catch/include/resource_guards.hh new file mode 100644 index 0000000000..f8d1688312 --- /dev/null +++ b/tests/catch/include/resource_guards.hh @@ -0,0 +1,144 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include + +enum class LinearAllocs { + malloc, + mallocAndRegister, + hipHostMalloc, + hipMalloc, + hipMallocManaged, +}; + +template class LinearAllocGuard { + public: + LinearAllocGuard(const LinearAllocs allocation_type, const size_t size, + const unsigned int flags = 0u) + : allocation_type_{allocation_type} { + switch (allocation_type_) { + case LinearAllocs::malloc: + ptr_ = host_ptr_ = reinterpret_cast(malloc(size)); + break; + case LinearAllocs::mallocAndRegister: + host_ptr_ = reinterpret_cast(malloc(size)); + HIP_CHECK(hipHostRegister(host_ptr_, size, flags)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&ptr_), host_ptr_, 0u)); + break; + case LinearAllocs::hipHostMalloc: + HIP_CHECK(hipHostMalloc(reinterpret_cast(&ptr_), size, flags)); + host_ptr_ = ptr_; + break; + case LinearAllocs::hipMalloc: + HIP_CHECK(hipMalloc(reinterpret_cast(&ptr_), size)); + break; + case LinearAllocs::hipMallocManaged: + HIP_CHECK(hipMallocManaged(reinterpret_cast(&ptr_), size, flags ? flags : 1u)); + host_ptr_ = ptr_; + } + } + + LinearAllocGuard(const LinearAllocGuard&) = delete; + LinearAllocGuard(LinearAllocGuard&&) = delete; + + ~LinearAllocGuard() { + // No Catch macros, don't want to possibly throw in the destructor + switch (allocation_type_) { + case LinearAllocs::malloc: + free(ptr_); + break; + case LinearAllocs::mallocAndRegister: + // Cast to void to suppress nodiscard warnings + static_cast(hipHostUnregister(host_ptr_)); + free(host_ptr_); + break; + case LinearAllocs::hipHostMalloc: + static_cast(hipHostFree(ptr_)); + break; + case LinearAllocs::hipMalloc: + case LinearAllocs::hipMallocManaged: + static_cast(hipFree(ptr_)); + } + } + + T* ptr() { return ptr_; }; + T* const ptr() const { return ptr_; }; + T* host_ptr() { return host_ptr_; } + T* const host_ptr() const { return host_ptr(); } + + private: + const LinearAllocs allocation_type_; + T* ptr_ = nullptr; + T* host_ptr_ = nullptr; +}; + +enum class Streams { nullstream, perThread, created }; + +class StreamGuard { + public: + StreamGuard(const Streams stream_type) : stream_type_{stream_type} { + switch (stream_type_) { + case Streams::nullstream: + stream_ = nullptr; + break; + case Streams::perThread: + stream_ = hipStreamPerThread; + break; + case Streams::created: + HIP_CHECK(hipStreamCreate(&stream_)); + } + } + + StreamGuard(const StreamGuard&) = delete; + StreamGuard(StreamGuard&&) = delete; + + ~StreamGuard() { + if (stream_type_ == Streams::created) { + static_cast(hipStreamDestroy(stream_)); + } + } + + hipStream_t stream() const { return stream_; } + + private: + const Streams stream_type_; + hipStream_t stream_; +}; + +inline unsigned int GenerateLinearAllocationFlagCombinations(const LinearAllocs allocation_type) { + switch (allocation_type) { + case LinearAllocs::mallocAndRegister: + // TODO + return 0; + case LinearAllocs::hipHostMalloc: + return GENERATE(hipHostMallocDefault, hipHostMallocPortable, hipHostMallocMapped, + hipHostMallocWriteCombined); + case LinearAllocs::hipMallocManaged: + // TODO + return 1u; + case LinearAllocs::malloc: + case LinearAllocs::hipMalloc: + return 0u; + default: + assert("Invalid LinearAllocs enumerator"); + } +} \ No newline at end of file diff --git a/tests/catch/include/utils.hh b/tests/catch/include/utils.hh new file mode 100644 index 0000000000..9edffc6f7c --- /dev/null +++ b/tests/catch/include/utils.hh @@ -0,0 +1,102 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +#include +#include + +namespace { +inline constexpr size_t kPageSize = 4096; +} // anonymous namespace + +template +void ArrayMismatch(T* const expected, T* const actual, const size_t num_elements) { + const auto ret = std::mismatch(expected, expected + num_elements, actual); + if (ret.first != expected + num_elements) { + const auto idx = std::distance(expected, ret.first); + INFO("Value mismatch at index: " << idx); + REQUIRE(expected[idx] == actual[idx]); + } +} + +template void ArrayFindIfNot(It begin, It end, const T expected_value) { + const auto it = std::find_if_not( + begin, end, [expected_value](const int elem) { return expected_value == elem; }); + + if (it != end) { + const auto idx = std::distance(begin, it); + INFO("Value mismatch at index " << idx); + REQUIRE(expected_value == *it); + } +} + +template +void ArrayFindIfNot(T* const array, const T expected_value, const size_t num_elements) { + ArrayFindIfNot(array, array + num_elements, expected_value); +} + +template +__global__ void VectorIncrement(T* const vec, const T increment_value, size_t N) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < N; i += stride) { + vec[i] += increment_value; + } +} + +template __global__ void VectorSet(T* const vec, const T value, size_t N) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < N; i += stride) { + vec[i] = value; + } +} + +// Will execute for atleast interval milliseconds +static __global__ void Delay(uint32_t interval, const uint32_t ticks_per_ms) { + while (interval--) { + uint64_t start = clock(); + while (clock() - start < ticks_per_ms) { + } + } +} + +inline void LaunchDelayKernel(const std::chrono::milliseconds interval, const hipStream_t stream) { + int ticks_per_ms = 0; + // Clock rate is in kHz => number of clock ticks in a millisecond + HIP_CHECK(hipDeviceGetAttribute(&ticks_per_ms, hipDeviceAttributeClockRate, 0)); + Delay<<<1, 1, 0, stream>>>(interval.count(), ticks_per_ms); + HIP_CHECK(hipGetLastError()); +} + +template +inline bool DeviceAttributesSupport(const int device, Attributes... attributes) { + constexpr auto DeviceAttributeSupport = [](const int device, + const hipDeviceAttribute_t attribute) { + int value = 0; + HIP_CHECK(hipDeviceGetAttribute(&value, attribute, device)); + return value; + }; + return (... && DeviceAttributeSupport(device, attributes)); +} \ No newline at end of file From a74fe2197e7d11e477ac7a4a35a4adc04c52a19f Mon Sep 17 00:00:00 2001 From: Dino Music Date: Thu, 6 Oct 2022 16:19:21 +0200 Subject: [PATCH 12/24] EXSWHTEC-94 - Remove c++14 standard constraint on memory tests --- tests/catch/unit/memory/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/catch/unit/memory/CMakeLists.txt b/tests/catch/unit/memory/CMakeLists.txt index f24c63ad8c..54e7708556 100644 --- a/tests/catch/unit/memory/CMakeLists.txt +++ b/tests/catch/unit/memory/CMakeLists.txt @@ -183,5 +183,4 @@ endif() hip_add_exe_to_target(NAME MemoryTest TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests - COMPILE_OPTIONS -std=c++14) + TEST_TARGET_NAME build_tests) From 350958e5e6bfa2efa722fefcbff09ac0e4e35f9a Mon Sep 17 00:00:00 2001 From: Dino Music Date: Thu, 6 Oct 2022 12:09:41 -0400 Subject: [PATCH 13/24] EXSWHTEC-94 - Remove GenerateLinearAllocationFlagCombinations until finished --- tests/catch/include/resource_guards.hh | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/tests/catch/include/resource_guards.hh b/tests/catch/include/resource_guards.hh index f8d1688312..7e6179c81a 100644 --- a/tests/catch/include/resource_guards.hh +++ b/tests/catch/include/resource_guards.hh @@ -122,23 +122,4 @@ class StreamGuard { private: const Streams stream_type_; hipStream_t stream_; -}; - -inline unsigned int GenerateLinearAllocationFlagCombinations(const LinearAllocs allocation_type) { - switch (allocation_type) { - case LinearAllocs::mallocAndRegister: - // TODO - return 0; - case LinearAllocs::hipHostMalloc: - return GENERATE(hipHostMallocDefault, hipHostMallocPortable, hipHostMallocMapped, - hipHostMallocWriteCombined); - case LinearAllocs::hipMallocManaged: - // TODO - return 1u; - case LinearAllocs::malloc: - case LinearAllocs::hipMalloc: - return 0u; - default: - assert("Invalid LinearAllocs enumerator"); - } -} \ No newline at end of file +}; \ No newline at end of file From 691d00ed3c7d4c7f34cbe1ea77bb28be34fd7239 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Thu, 6 Oct 2022 15:47:34 +0200 Subject: [PATCH 14/24] EXSWHTEC-94 - Implement helper classes and functions for memory tests --- tests/catch/include/resource_guards.hh | 144 +++++++++++++++++++++++++ tests/catch/include/utils.hh | 102 ++++++++++++++++++ 2 files changed, 246 insertions(+) create mode 100644 tests/catch/include/resource_guards.hh create mode 100644 tests/catch/include/utils.hh diff --git a/tests/catch/include/resource_guards.hh b/tests/catch/include/resource_guards.hh new file mode 100644 index 0000000000..f8d1688312 --- /dev/null +++ b/tests/catch/include/resource_guards.hh @@ -0,0 +1,144 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include + +enum class LinearAllocs { + malloc, + mallocAndRegister, + hipHostMalloc, + hipMalloc, + hipMallocManaged, +}; + +template class LinearAllocGuard { + public: + LinearAllocGuard(const LinearAllocs allocation_type, const size_t size, + const unsigned int flags = 0u) + : allocation_type_{allocation_type} { + switch (allocation_type_) { + case LinearAllocs::malloc: + ptr_ = host_ptr_ = reinterpret_cast(malloc(size)); + break; + case LinearAllocs::mallocAndRegister: + host_ptr_ = reinterpret_cast(malloc(size)); + HIP_CHECK(hipHostRegister(host_ptr_, size, flags)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&ptr_), host_ptr_, 0u)); + break; + case LinearAllocs::hipHostMalloc: + HIP_CHECK(hipHostMalloc(reinterpret_cast(&ptr_), size, flags)); + host_ptr_ = ptr_; + break; + case LinearAllocs::hipMalloc: + HIP_CHECK(hipMalloc(reinterpret_cast(&ptr_), size)); + break; + case LinearAllocs::hipMallocManaged: + HIP_CHECK(hipMallocManaged(reinterpret_cast(&ptr_), size, flags ? flags : 1u)); + host_ptr_ = ptr_; + } + } + + LinearAllocGuard(const LinearAllocGuard&) = delete; + LinearAllocGuard(LinearAllocGuard&&) = delete; + + ~LinearAllocGuard() { + // No Catch macros, don't want to possibly throw in the destructor + switch (allocation_type_) { + case LinearAllocs::malloc: + free(ptr_); + break; + case LinearAllocs::mallocAndRegister: + // Cast to void to suppress nodiscard warnings + static_cast(hipHostUnregister(host_ptr_)); + free(host_ptr_); + break; + case LinearAllocs::hipHostMalloc: + static_cast(hipHostFree(ptr_)); + break; + case LinearAllocs::hipMalloc: + case LinearAllocs::hipMallocManaged: + static_cast(hipFree(ptr_)); + } + } + + T* ptr() { return ptr_; }; + T* const ptr() const { return ptr_; }; + T* host_ptr() { return host_ptr_; } + T* const host_ptr() const { return host_ptr(); } + + private: + const LinearAllocs allocation_type_; + T* ptr_ = nullptr; + T* host_ptr_ = nullptr; +}; + +enum class Streams { nullstream, perThread, created }; + +class StreamGuard { + public: + StreamGuard(const Streams stream_type) : stream_type_{stream_type} { + switch (stream_type_) { + case Streams::nullstream: + stream_ = nullptr; + break; + case Streams::perThread: + stream_ = hipStreamPerThread; + break; + case Streams::created: + HIP_CHECK(hipStreamCreate(&stream_)); + } + } + + StreamGuard(const StreamGuard&) = delete; + StreamGuard(StreamGuard&&) = delete; + + ~StreamGuard() { + if (stream_type_ == Streams::created) { + static_cast(hipStreamDestroy(stream_)); + } + } + + hipStream_t stream() const { return stream_; } + + private: + const Streams stream_type_; + hipStream_t stream_; +}; + +inline unsigned int GenerateLinearAllocationFlagCombinations(const LinearAllocs allocation_type) { + switch (allocation_type) { + case LinearAllocs::mallocAndRegister: + // TODO + return 0; + case LinearAllocs::hipHostMalloc: + return GENERATE(hipHostMallocDefault, hipHostMallocPortable, hipHostMallocMapped, + hipHostMallocWriteCombined); + case LinearAllocs::hipMallocManaged: + // TODO + return 1u; + case LinearAllocs::malloc: + case LinearAllocs::hipMalloc: + return 0u; + default: + assert("Invalid LinearAllocs enumerator"); + } +} \ No newline at end of file diff --git a/tests/catch/include/utils.hh b/tests/catch/include/utils.hh new file mode 100644 index 0000000000..9edffc6f7c --- /dev/null +++ b/tests/catch/include/utils.hh @@ -0,0 +1,102 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +#include +#include + +namespace { +inline constexpr size_t kPageSize = 4096; +} // anonymous namespace + +template +void ArrayMismatch(T* const expected, T* const actual, const size_t num_elements) { + const auto ret = std::mismatch(expected, expected + num_elements, actual); + if (ret.first != expected + num_elements) { + const auto idx = std::distance(expected, ret.first); + INFO("Value mismatch at index: " << idx); + REQUIRE(expected[idx] == actual[idx]); + } +} + +template void ArrayFindIfNot(It begin, It end, const T expected_value) { + const auto it = std::find_if_not( + begin, end, [expected_value](const int elem) { return expected_value == elem; }); + + if (it != end) { + const auto idx = std::distance(begin, it); + INFO("Value mismatch at index " << idx); + REQUIRE(expected_value == *it); + } +} + +template +void ArrayFindIfNot(T* const array, const T expected_value, const size_t num_elements) { + ArrayFindIfNot(array, array + num_elements, expected_value); +} + +template +__global__ void VectorIncrement(T* const vec, const T increment_value, size_t N) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < N; i += stride) { + vec[i] += increment_value; + } +} + +template __global__ void VectorSet(T* const vec, const T value, size_t N) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < N; i += stride) { + vec[i] = value; + } +} + +// Will execute for atleast interval milliseconds +static __global__ void Delay(uint32_t interval, const uint32_t ticks_per_ms) { + while (interval--) { + uint64_t start = clock(); + while (clock() - start < ticks_per_ms) { + } + } +} + +inline void LaunchDelayKernel(const std::chrono::milliseconds interval, const hipStream_t stream) { + int ticks_per_ms = 0; + // Clock rate is in kHz => number of clock ticks in a millisecond + HIP_CHECK(hipDeviceGetAttribute(&ticks_per_ms, hipDeviceAttributeClockRate, 0)); + Delay<<<1, 1, 0, stream>>>(interval.count(), ticks_per_ms); + HIP_CHECK(hipGetLastError()); +} + +template +inline bool DeviceAttributesSupport(const int device, Attributes... attributes) { + constexpr auto DeviceAttributeSupport = [](const int device, + const hipDeviceAttribute_t attribute) { + int value = 0; + HIP_CHECK(hipDeviceGetAttribute(&value, attribute, device)); + return value; + }; + return (... && DeviceAttributeSupport(device, attributes)); +} \ No newline at end of file From 7bdf52f994ff486405fc72012cf98a5963a90442 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Thu, 6 Oct 2022 16:19:21 +0200 Subject: [PATCH 15/24] EXSWHTEC-94 - Remove c++14 standard constraint on memory tests --- tests/catch/unit/memory/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/catch/unit/memory/CMakeLists.txt b/tests/catch/unit/memory/CMakeLists.txt index f24c63ad8c..54e7708556 100644 --- a/tests/catch/unit/memory/CMakeLists.txt +++ b/tests/catch/unit/memory/CMakeLists.txt @@ -183,5 +183,4 @@ endif() hip_add_exe_to_target(NAME MemoryTest TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests - COMPILE_OPTIONS -std=c++14) + TEST_TARGET_NAME build_tests) From 1185c3973331e76e1f5d23e7864b4bff89a4c922 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Thu, 6 Oct 2022 12:09:41 -0400 Subject: [PATCH 16/24] EXSWHTEC-94 - Remove GenerateLinearAllocationFlagCombinations until finished --- tests/catch/include/resource_guards.hh | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/tests/catch/include/resource_guards.hh b/tests/catch/include/resource_guards.hh index f8d1688312..7e6179c81a 100644 --- a/tests/catch/include/resource_guards.hh +++ b/tests/catch/include/resource_guards.hh @@ -122,23 +122,4 @@ class StreamGuard { private: const Streams stream_type_; hipStream_t stream_; -}; - -inline unsigned int GenerateLinearAllocationFlagCombinations(const LinearAllocs allocation_type) { - switch (allocation_type) { - case LinearAllocs::mallocAndRegister: - // TODO - return 0; - case LinearAllocs::hipHostMalloc: - return GENERATE(hipHostMallocDefault, hipHostMallocPortable, hipHostMallocMapped, - hipHostMallocWriteCombined); - case LinearAllocs::hipMallocManaged: - // TODO - return 1u; - case LinearAllocs::malloc: - case LinearAllocs::hipMalloc: - return 0u; - default: - assert("Invalid LinearAllocs enumerator"); - } -} \ No newline at end of file +}; \ No newline at end of file From 8911eb7cd62641f50c1555f19118f764ff3ae7bf Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Wed, 12 Oct 2022 10:25:11 +0200 Subject: [PATCH 17/24] EXSWHTEC-94 - Implement resource guards for hipMallocPitch and 3D allocations. --- tests/catch/include/resource_guards.hh | 55 ++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tests/catch/include/resource_guards.hh b/tests/catch/include/resource_guards.hh index 7e6179c81a..0db1276f15 100644 --- a/tests/catch/include/resource_guards.hh +++ b/tests/catch/include/resource_guards.hh @@ -91,6 +91,61 @@ template class LinearAllocGuard { T* host_ptr_ = nullptr; }; +template class LinearAllocGuardMultiDim { + protected: + LinearAllocGuardMultiDim(hipExtent extent) + : extent_{extent} {} + + ~LinearAllocGuardMultiDim() { + static_cast(hipFree(pitched_ptr_.ptr)); + } + + public: + T* ptr() const { return reinterpret_cast(pitched_ptr_.ptr); }; + + size_t pitch() const { return pitched_ptr_.pitch; } + + hipExtent extent() const { return extent_; } + + hipPitchedPtr pitched_ptr() const { return pitched_ptr_; } + + size_t width() const { return extent_.width; } + + size_t width_logical() const { return extent_.width / sizeof(T); } + + size_t height() const { return extent_.height; } + + public: + hipPitchedPtr pitched_ptr_; + const hipExtent extent_; +}; + +template class LinearAllocGuard2D : public LinearAllocGuardMultiDim { + public: + LinearAllocGuard2D(const size_t width_logical, const size_t height) + : LinearAllocGuardMultiDim{make_hipExtent(width_logical * sizeof(T), height, 1)} + { + HIP_CHECK(hipMallocPitch(&this->pitched_ptr_.ptr, &this->pitched_ptr_.pitch, this->extent_.width, this->extent_.height)); + } + + LinearAllocGuard2D(const LinearAllocGuard2D&) = delete; + LinearAllocGuard2D(LinearAllocGuard2D&&) = delete; +}; + +template class LinearAllocGuard3D : public LinearAllocGuardMultiDim { + public: + LinearAllocGuard3D(const size_t width_logical, const size_t height, const size_t depth) + : LinearAllocGuardMultiDim{make_hipExtent(width_logical * sizeof(T), height, depth)} + { + HIP_CHECK(hipMalloc3D(&this->pitched_ptr_, this->extent_)); + } + + LinearAllocGuard3D(const LinearAllocGuard3D&) = delete; + LinearAllocGuard3D(LinearAllocGuard3D&&) = delete; + + size_t depth() const { return this->extent_.depth; } +}; + enum class Streams { nullstream, perThread, created }; class StreamGuard { From 76c8e3104c5881e6469b9229d2226011e175e4b7 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Fri, 14 Oct 2022 19:47:44 +0200 Subject: [PATCH 18/24] EXSWHTEC-94 - Add resource guards for 2D and 3D allocations and utils for handling pitched memory --- tests/catch/include/resource_guards.hh | 10 +++--- tests/catch/include/utils.hh | 43 ++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/tests/catch/include/resource_guards.hh b/tests/catch/include/resource_guards.hh index 0db1276f15..b3ef7813f7 100644 --- a/tests/catch/include/resource_guards.hh +++ b/tests/catch/include/resource_guards.hh @@ -80,10 +80,8 @@ template class LinearAllocGuard { } } - T* ptr() { return ptr_; }; - T* const ptr() const { return ptr_; }; - T* host_ptr() { return host_ptr_; } - T* const host_ptr() const { return host_ptr(); } + T* ptr() const { return ptr_; }; + T* host_ptr() const { return host_ptr_; } private: const LinearAllocs allocation_type_; @@ -140,6 +138,10 @@ template class LinearAllocGuard3D : public LinearAllocGuardMultiDim HIP_CHECK(hipMalloc3D(&this->pitched_ptr_, this->extent_)); } + LinearAllocGuard3D(const hipExtent extent) : LinearAllocGuardMultiDim(extent) { + HIP_CHECK(hipMalloc3D(&this->pitched_ptr_, this->extent_)); + } + LinearAllocGuard3D(const LinearAllocGuard3D&) = delete; LinearAllocGuard3D(LinearAllocGuard3D&&) = delete; diff --git a/tests/catch/include/utils.hh b/tests/catch/include/utils.hh index 9edffc6f7c..05eecea79f 100644 --- a/tests/catch/include/utils.hh +++ b/tests/catch/include/utils.hh @@ -54,6 +54,37 @@ void ArrayFindIfNot(T* const array, const T expected_value, const size_t num_ele ArrayFindIfNot(array, array + num_elements, expected_value); } +template +void PitchedMemoryVerify(T* const ptr, const size_t pitch, const size_t width, const size_t height, + const size_t depth, F expected_value_generator) { + for (int z = 0; z < depth; ++z) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + const auto slice = reinterpret_cast(ptr) + pitch * height * z; + const auto row = slice + pitch * y; + if (reinterpret_cast(row)[x] != expected_value_generator(x, y, z)) { + INFO("Mismatch at indices: " << x << ", " << y << ", " << z); + REQUIRE(reinterpret_cast(row)[x] == expected_value_generator(x, y, z)); + } + } + } + } +} + +template +void PitchedMemorySet(T* const ptr, const size_t pitch, const size_t width, const size_t height, + const size_t depth, F expected_value_generator) { + for (int z = 0; z < depth; ++z) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + const auto slice = reinterpret_cast(ptr) + pitch * height * z; + const auto row = slice + pitch * y; + reinterpret_cast(row)[x] = expected_value_generator(x, y, z); + } + } + } +} + template __global__ void VectorIncrement(T* const vec, const T increment_value, size_t N) { size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); @@ -82,6 +113,18 @@ static __global__ void Delay(uint32_t interval, const uint32_t ticks_per_ms) { } } +template +__global__ void Iota(T* const out, size_t pitch, size_t w, size_t h, size_t d) { + const auto x = blockIdx.x * blockDim.x + threadIdx.x; + const auto y = blockIdx.y * blockDim.y + threadIdx.y; + const auto z = blockIdx.z * blockDim.z + threadIdx.z; + if (x < w && y < h && z < d) { + char* const slice = reinterpret_cast(out) + pitch * h * z; + char* const row = slice + pitch * y; + reinterpret_cast(row)[x] = z * w * h + y * w + x; + } +} + inline void LaunchDelayKernel(const std::chrono::milliseconds interval, const hipStream_t stream) { int ticks_per_ms = 0; // Clock rate is in kHz => number of clock ticks in a millisecond From a724cb59c15e64b40eba691b7fb50b57b1e6a0d4 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Mon, 17 Oct 2022 01:44:26 -0400 Subject: [PATCH 19/24] EXSWHTEC-83 - Add return value for default branches in helper functions --- tests/catch/unit/memory/hipMemAdvise.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/catch/unit/memory/hipMemAdvise.cc b/tests/catch/unit/memory/hipMemAdvise.cc index 37181e36c1..58b251045b 100644 --- a/tests/catch/unit/memory/hipMemAdvise.cc +++ b/tests/catch/unit/memory/hipMemAdvise.cc @@ -34,6 +34,7 @@ static inline hipMemoryAdvise GetUnsetMemAdvice(const hipMemoryAdvise advice) { return hipMemAdviseUnsetPreferredLocation; default: assert("Invalid hipMemoryAdvise enumerator"); + return advice; } } @@ -47,6 +48,7 @@ static inline hipMemRangeAttribute GetMemAdviceAttr(const hipMemoryAdvise advice return hipMemRangeAttributePreferredLocation; default: assert("Invalid hipMemoryAdvise enumerator"); + return static_cast(-1); } } From 7734178657b4b586219695d617ede5f8e5a98777 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Tue, 18 Oct 2022 11:44:28 +0200 Subject: [PATCH 20/24] EXSWHTEC-94 - Implement resource guards for arrays. --- tests/catch/include/resource_guards.hh | 82 ++++++++++++++++++++------ 1 file changed, 65 insertions(+), 17 deletions(-) diff --git a/tests/catch/include/resource_guards.hh b/tests/catch/include/resource_guards.hh index b3ef7813f7..a9c7512a3d 100644 --- a/tests/catch/include/resource_guards.hh +++ b/tests/catch/include/resource_guards.hh @@ -19,6 +19,7 @@ THE SOFTWARE. #pragma once +#include #include #include @@ -90,15 +91,12 @@ template class LinearAllocGuard { }; template class LinearAllocGuardMultiDim { - protected: - LinearAllocGuardMultiDim(hipExtent extent) - : extent_{extent} {} + protected: + LinearAllocGuardMultiDim(hipExtent extent) : extent_{extent} {} - ~LinearAllocGuardMultiDim() { - static_cast(hipFree(pitched_ptr_.ptr)); - } - - public: + ~LinearAllocGuardMultiDim() { static_cast(hipFree(pitched_ptr_.ptr)); } + + public: T* ptr() const { return reinterpret_cast(pitched_ptr_.ptr); }; size_t pitch() const { return pitched_ptr_.pitch; } @@ -113,17 +111,17 @@ template class LinearAllocGuardMultiDim { size_t height() const { return extent_.height; } - public: + public: hipPitchedPtr pitched_ptr_; const hipExtent extent_; }; template class LinearAllocGuard2D : public LinearAllocGuardMultiDim { - public: - LinearAllocGuard2D(const size_t width_logical, const size_t height) - : LinearAllocGuardMultiDim{make_hipExtent(width_logical * sizeof(T), height, 1)} - { - HIP_CHECK(hipMallocPitch(&this->pitched_ptr_.ptr, &this->pitched_ptr_.pitch, this->extent_.width, this->extent_.height)); + public: + LinearAllocGuard2D(const size_t width_logical, const size_t height) + : LinearAllocGuardMultiDim{make_hipExtent(width_logical * sizeof(T), height, 1)} { + HIP_CHECK(hipMallocPitch(&this->pitched_ptr_.ptr, &this->pitched_ptr_.pitch, + this->extent_.width, this->extent_.height)); } LinearAllocGuard2D(const LinearAllocGuard2D&) = delete; @@ -131,10 +129,9 @@ template class LinearAllocGuard2D : public LinearAllocGuardMultiDim }; template class LinearAllocGuard3D : public LinearAllocGuardMultiDim { - public: + public: LinearAllocGuard3D(const size_t width_logical, const size_t height, const size_t depth) - : LinearAllocGuardMultiDim{make_hipExtent(width_logical * sizeof(T), height, depth)} - { + : LinearAllocGuardMultiDim{make_hipExtent(width_logical * sizeof(T), height, depth)} { HIP_CHECK(hipMalloc3D(&this->pitched_ptr_, this->extent_)); } @@ -148,6 +145,57 @@ template class LinearAllocGuard3D : public LinearAllocGuardMultiDim size_t depth() const { return this->extent_.depth; } }; +template class ArrayAllocGuard { + public: + // extent should contain logical width + ArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u) : extent_{extent} { + hipChannelFormatDesc desc = hipCreateChannelDesc(); + HIP_CHECK(hipMalloc3DArray(&ptr_, &desc, extent_, flags)); + } + + ~ArrayAllocGuard() { static_cast(hipFreeArray(ptr_)); } + + ArrayAllocGuard(const ArrayAllocGuard&) = delete; + ArrayAllocGuard(ArrayAllocGuard&&) = delete; + + hipArray_t ptr() const { return ptr_; } + + hipExtent extent() const { return extent_; } + + private: + hipArray_t ptr_ = nullptr; + const hipExtent extent_; +}; + +template class DrvArrayAllocGuard { + public: + // extent should contain width in bytes + DrvArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u) : extent_{extent} { + HIP_ARRAY3D_DESCRIPTOR desc{}; + using vec_info = vector_info; + desc.Format = vec_info::format; + desc.NumChannels = vec_info::size; + desc.Width = extent_.width / sizeof(T); + desc.Height = extent_.height; + desc.Depth = extent_.depth; + desc.Flags = flags; + HIP_CHECK(hipArray3DCreate(&ptr_, &desc)); + } + + ~DrvArrayAllocGuard() { static_cast(hipArrayDestroy(ptr_)); } + + DrvArrayAllocGuard(const DrvArrayAllocGuard&) = delete; + DrvArrayAllocGuard(DrvArrayAllocGuard&&) = delete; + + hiparray ptr() const { return ptr_; } + + hipExtent extent() const { return extent_; } + + private: + hiparray ptr_ = nullptr; + const hipExtent extent_; +}; + enum class Streams { nullstream, perThread, created }; class StreamGuard { From 1fd1cb0cdcde42e67f97c3aedf246b2061328132 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Tue, 18 Oct 2022 13:46:02 +0200 Subject: [PATCH 21/24] EXSWHTEC-94 - Add hip_array_common.hh. --- tests/catch/include/hip_array_common.hh | 84 +++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 tests/catch/include/hip_array_common.hh diff --git a/tests/catch/include/hip_array_common.hh b/tests/catch/include/hip_array_common.hh new file mode 100644 index 0000000000..fd6f094f8d --- /dev/null +++ b/tests/catch/include/hip_array_common.hh @@ -0,0 +1,84 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +template struct type_and_size_and_format { + using type = T; + static constexpr size_t size = N; + static constexpr hipArray_Format format = Format; +}; + +// Create a map of type to scalar type, vector size and scalar type format enum. +// This is useful for creating simpler function that depend on the vector size. +template struct vector_info; +template <> +struct vector_info : type_and_size_and_format {}; +template <> struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; + +template <> +struct vector_info : type_and_size_and_format {}; +template <> struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; + +template <> +struct vector_info : type_and_size_and_format {}; +template <> struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; \ No newline at end of file From fc3a10712fd51b28185e596ca2e013d8db79d454 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Tue, 18 Oct 2022 14:51:44 +0200 Subject: [PATCH 22/24] EXSWHTEC-94 - Remove redundancies between hip_array_common.hh and hipArrayCommon.hh. --- tests/catch/unit/memory/hipArray3DCreate.cc | 1 + tests/catch/unit/memory/hipArrayCommon.hh | 60 --------------------- tests/catch/unit/memory/hipArrayCreate.cc | 1 + tests/catch/unit/memory/hipFree.cc | 1 + tests/catch/unit/memory/hipMallocArray.cc | 1 + 5 files changed, 4 insertions(+), 60 deletions(-) diff --git a/tests/catch/unit/memory/hipArray3DCreate.cc b/tests/catch/unit/memory/hipArray3DCreate.cc index 973868eded..4cf189611b 100644 --- a/tests/catch/unit/memory/hipArray3DCreate.cc +++ b/tests/catch/unit/memory/hipArray3DCreate.cc @@ -20,6 +20,7 @@ THE SOFTWARE. #include #include "DriverContext.hh" #include "hipArrayCommon.hh" +#include "hip_array_common.hh" #include "hip_test_common.hh" namespace { diff --git a/tests/catch/unit/memory/hipArrayCommon.hh b/tests/catch/unit/memory/hipArrayCommon.hh index b40014b490..b0beeb3126 100644 --- a/tests/catch/unit/memory/hipArrayCommon.hh +++ b/tests/catch/unit/memory/hipArrayCommon.hh @@ -26,66 +26,6 @@ THE SOFTWARE. constexpr size_t BlockSize = 16; -template struct type_and_size_and_format { - using type = T; - static constexpr size_t size = N; - static constexpr hipArray_Format format = Format; -}; - -// Create a map of type to scalar type, vector size and scalar type format enum. -// This is useful for creating simpler function that depend on the vector size. -template struct vector_info; -template <> -struct vector_info : type_and_size_and_format {}; -template <> struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; - -template <> -struct vector_info : type_and_size_and_format {}; -template <> struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; - -template <> -struct vector_info : type_and_size_and_format {}; -template <> struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; - // read from a texture using normalized coordinates constexpr size_t ChannelToRead = 1; template diff --git a/tests/catch/unit/memory/hipArrayCreate.cc b/tests/catch/unit/memory/hipArrayCreate.cc index 6cc535593a..70a8636922 100644 --- a/tests/catch/unit/memory/hipArrayCreate.cc +++ b/tests/catch/unit/memory/hipArrayCreate.cc @@ -27,6 +27,7 @@ hipArrayCreate API test scenarios #include #include #include +#include #include "hipArrayCommon.hh" #include "DriverContext.hh" diff --git a/tests/catch/unit/memory/hipFree.cc b/tests/catch/unit/memory/hipFree.cc index 1248deebc1..b29854271c 100644 --- a/tests/catch/unit/memory/hipFree.cc +++ b/tests/catch/unit/memory/hipFree.cc @@ -22,6 +22,7 @@ THE SOFTWARE. #include +#include #include "hipArrayCommon.hh" #include "DriverContext.hh" diff --git a/tests/catch/unit/memory/hipMallocArray.cc b/tests/catch/unit/memory/hipMallocArray.cc index b6c4939b1e..530eb11077 100644 --- a/tests/catch/unit/memory/hipMallocArray.cc +++ b/tests/catch/unit/memory/hipMallocArray.cc @@ -26,6 +26,7 @@ hipMallocArray API test scenarios */ #include +#include #include #include #include "hipArrayCommon.hh" From 4e2fe8d999f45d3645a7714b5eff908eeaee64e3 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Fri, 4 Nov 2022 12:11:48 +0100 Subject: [PATCH 23/24] EXSWHTEC-94 - Fix loop counter types in PitchedMemoryVerify and PitchedMemorySet --- tests/catch/include/utils.hh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/catch/include/utils.hh b/tests/catch/include/utils.hh index 05eecea79f..bbab2322fe 100644 --- a/tests/catch/include/utils.hh +++ b/tests/catch/include/utils.hh @@ -57,9 +57,9 @@ void ArrayFindIfNot(T* const array, const T expected_value, const size_t num_ele template void PitchedMemoryVerify(T* const ptr, const size_t pitch, const size_t width, const size_t height, const size_t depth, F expected_value_generator) { - for (int z = 0; z < depth; ++z) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { + for (size_t z = 0; z < depth; ++z) { + for (size_t y = 0; y < height; ++y) { + for (size_t x = 0; x < width; ++x) { const auto slice = reinterpret_cast(ptr) + pitch * height * z; const auto row = slice + pitch * y; if (reinterpret_cast(row)[x] != expected_value_generator(x, y, z)) { @@ -74,9 +74,9 @@ void PitchedMemoryVerify(T* const ptr, const size_t pitch, const size_t width, c template void PitchedMemorySet(T* const ptr, const size_t pitch, const size_t width, const size_t height, const size_t depth, F expected_value_generator) { - for (int z = 0; z < depth; ++z) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { + for (size_t z = 0; z < depth; ++z) { + for (size_t y = 0; y < height; ++y) { + for (size_t x = 0; x < width; ++x) { const auto slice = reinterpret_cast(ptr) + pitch * height * z; const auto row = slice + pitch * y; reinterpret_cast(row)[x] = expected_value_generator(x, y, z); From c4def8e2a3ca29a7e417abd28636e239f072dff1 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Mon, 7 Nov 2022 10:10:16 +0100 Subject: [PATCH 24/24] EXSWHTEC-83 - Disable test that fail due to defects --- .../config/config_amd_linux_common.json | 4 ++- .../config/config_amd_windows_common.json | 4 ++- tests/catch/unit/memory/hipMemAdvise.cc | 29 +++++++++++++++++-- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/tests/catch/hipTestMain/config/config_amd_linux_common.json b/tests/catch/hipTestMain/config/config_amd_linux_common.json index 2a857c27d0..33e539d02c 100644 --- a/tests/catch/hipTestMain/config/config_amd_linux_common.json +++ b/tests/catch/hipTestMain/config/config_amd_linux_common.json @@ -7,6 +7,8 @@ "Unit_hipDeviceGetCacheConfig_Positive_Threaded", "Unit_hipGetDeviceFlags_Positive_Context", "Unit_hipIpcCloseMemHandle_Negative_Close_In_Originating_Process", - "Unit_hipDeviceGetPCIBusId_Negative_PartialFill" + "Unit_hipDeviceGetPCIBusId_Negative_PartialFill", + "Unit_hipMemAdvise_AccessedBy_All_Devices", + "Unit_hipMemAdvise_No_Flag_Interference" ] } diff --git a/tests/catch/hipTestMain/config/config_amd_windows_common.json b/tests/catch/hipTestMain/config/config_amd_windows_common.json index 3c8570f63d..9f32d0dd65 100644 --- a/tests/catch/hipTestMain/config/config_amd_windows_common.json +++ b/tests/catch/hipTestMain/config/config_amd_windows_common.json @@ -100,6 +100,8 @@ "Unit_hipStreamValue_Wait64_Blocking_NoMask_Nor", "Unit_hipGetDeviceFlags_Positive_Context", "Unit_hipIpcCloseMemHandle_Negative_Close_In_Originating_Process", - "Unit_hipDeviceGetPCIBusId_Negative_PartialFill" + "Unit_hipDeviceGetPCIBusId_Negative_PartialFill", + "Unit_hipMemAdvise_AccessedBy_All_Devices", + "Unit_hipMemAdvise_No_Flag_Interference" ] } diff --git a/tests/catch/unit/memory/hipMemAdvise.cc b/tests/catch/unit/memory/hipMemAdvise.cc index c43b47ab4e..5819aee9ee 100644 --- a/tests/catch/unit/memory/hipMemAdvise.cc +++ b/tests/catch/unit/memory/hipMemAdvise.cc @@ -87,7 +87,10 @@ TEST_CASE("Unit_hipMemAdvise_Set_Unset_Basic") { REQUIRE((advice == hipMemAdviseSetReadMostly ? 0 : hipInvalidDeviceId) == attribute); }; +// Disabled due to defect - EXSWHTEC-132 +#if HT_NVIDIA SECTION("hipMemAdviseSetAccessedBy") { SetUnset(hipMemAdviseSetAccessedBy); } +#endif SECTION("hipMemAdviseSetReadMostly") { SetUnset(hipMemAdviseSetReadMostly); } SECTION("hipMemAdviseSetPreferredLocation") { SetUnset(hipMemAdviseSetPreferredLocation); } } @@ -171,7 +174,10 @@ TEST_CASE("Unit_hipMemAdvise_Flags_Do_Not_Cause_Prefetch") { GENERATE_COPY(from_range(std::begin(supported_devices), std::end(supported_devices))); SECTION("hipMemAdviseSetPreferredLocation") { Test(device, hipMemAdviseSetPreferredLocation); } +// Disabled on AMD due to defect - EXSWHTEC-132 +#if HT_NVIDIA SECTION("hipMemAdviseSetAccessedBy") { Test(device, hipMemAdviseSetAccessedBy); } +#endif } TEST_CASE("Unit_hipMemAdvise_Read_Write_After_Advise") { @@ -201,13 +207,21 @@ TEST_CASE("Unit_hipMemAdvise_Read_Write_After_Advise") { REQUIRE((advice == hipMemAdviseSetReadMostly ? 1 : device) == attribute); }; +// Disabled on AMD due to defect - EXSWHTEC-133 +#if HT_NVIDIA SECTION("ReadMostly") { ReadWriteManagedMemory(hipInvalidDeviceId, hipMemAdviseSetReadMostly); } +#endif supported_devices.push_back(hipCpuDeviceId); + const auto device = GENERATE_COPY(from_range(std::begin(supported_devices), std::end(supported_devices))); supported_devices.pop_back(); SECTION("PreferredLocation") { ReadWriteManagedMemory(device, hipMemAdviseSetPreferredLocation); } + +// Disabled on AMD due to defect - EXSWHTEC-132 +#if HT_NVIDIA SECTION("AccessedBy") { ReadWriteManagedMemory(device, hipMemAdviseSetAccessedBy); } +#endif } TEST_CASE("Unit_hipMemAdvise_Prefetch_After_Advise") { @@ -216,8 +230,10 @@ TEST_CASE("Unit_hipMemAdvise_Prefetch_After_Advise") { HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); } supported_devices.push_back(hipCpuDeviceId); - const auto advice = GENERATE(hipMemAdviseSetAccessedBy, hipMemAdviseSetReadMostly, - hipMemAdviseSetPreferredLocation); + const auto advice = GENERATE(hipMemAdviseSetReadMostly, hipMemAdviseSetPreferredLocation + // Skipped due to defect - EXSWHTEC - 132 + // hipMemAdviseSetAccessedBy + ); const auto device = GENERATE_COPY(from_range(supported_devices)); LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); @@ -266,10 +282,13 @@ TEST_CASE("Unit_hipMemAdvise_Negative_Parameters") { LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); +// Disabled on NVIDIA due to defect - EXSWHTEC-122 +#if HT_AMD SECTION("Invalid advice") { HIP_CHECK_ERROR(hipMemAdvise(alloc.ptr(), kPageSize, static_cast(-1), device), hipErrorInvalidValue); } +#endif const auto advice = GENERATE(hipMemAdviseSetAccessedBy, hipMemAdviseSetReadMostly, hipMemAdviseSetPreferredLocation); @@ -277,9 +296,12 @@ TEST_CASE("Unit_hipMemAdvise_Negative_Parameters") { HIP_CHECK_ERROR(hipMemAdvise(alloc.ptr(), 0, advice, device), hipErrorInvalidValue); } +// Disabled due to defect - EXSWHTEC-131 +#if HT_NVIDIA SECTION("count larger than allocation size") { HIP_CHECK_ERROR(hipMemAdvise(alloc.ptr(), kPageSize + 1, advice, device), hipErrorInvalidValue); } +#endif SECTION("dev_ptr == nullptr") { HIP_CHECK_ERROR(hipMemAdvise(nullptr, kPageSize, advice, device), hipErrorInvalidValue); @@ -290,8 +312,11 @@ TEST_CASE("Unit_hipMemAdvise_Negative_Parameters") { HIP_CHECK_ERROR(hipMemAdvise(alloc.ptr(), kPageSize, advice, device), hipErrorInvalidValue); } +// Disabled on AMD due to defect - EXSWHTEC-130 +#if HT_NVIDIA SECTION("Invalid device") { HIP_CHECK_ERROR(hipMemAdvise(alloc.ptr(), kPageSize, advice, hipInvalidDeviceId), (advice == hipMemAdviseSetReadMostly ? hipSuccess : hipErrorInvalidDevice)); } +#endif } \ No newline at end of file