diff --git a/tests/catch/hipTestMain/config/config_amd_linux_common.json b/tests/catch/hipTestMain/config/config_amd_linux_common.json index b3a7503222..7107416167 100644 --- a/tests/catch/hipTestMain/config/config_amd_linux_common.json +++ b/tests/catch/hipTestMain/config/config_amd_linux_common.json @@ -10,6 +10,8 @@ "Unit_hipGetDeviceFlags_Positive_Context", "Unit_hipIpcCloseMemHandle_Negative_Close_In_Originating_Process", "Unit_hipIpcOpenMemHandle_Negative_Open_In_Creating_Process", - "Unit_hipDeviceGetPCIBusId_Negative_PartialFill" + "Unit_hipDeviceGetPCIBusId_Negative_PartialFill", + "Unit_hipMemAdvise_AccessedBy_All_Devices", + "Unit_hipMemAdvise_No_Flag_Interference" ] } diff --git a/tests/catch/hipTestMain/config/config_amd_windows_common.json b/tests/catch/hipTestMain/config/config_amd_windows_common.json index 17ae9eb145..9c38e8d959 100644 --- a/tests/catch/hipTestMain/config/config_amd_windows_common.json +++ b/tests/catch/hipTestMain/config/config_amd_windows_common.json @@ -79,6 +79,8 @@ "Unit_hipIpcOpenMemHandle_Negative_Open_In_Creating_Process", "Unit_hipDeviceGetPCIBusId_Negative_PartialFill", "Unit_hipDeviceGetSharedMemConfig_Positive_Basic", - "Unit_hipDeviceGetSharedMemConfig_Positive_Threaded" + "Unit_hipDeviceGetSharedMemConfig_Positive_Threaded", + "Unit_hipMemAdvise_AccessedBy_All_Devices", + "Unit_hipMemAdvise_No_Flag_Interference" ] } diff --git a/tests/catch/include/hip_array_common.hh b/tests/catch/include/hip_array_common.hh new file mode 100644 index 0000000000..fd6f094f8d --- /dev/null +++ b/tests/catch/include/hip_array_common.hh @@ -0,0 +1,84 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +template struct type_and_size_and_format { + using type = T; + static constexpr size_t size = N; + static constexpr hipArray_Format format = Format; +}; + +// Create a map of type to scalar type, vector size and scalar type format enum. +// This is useful for creating simpler function that depend on the vector size. +template struct vector_info; +template <> +struct vector_info : type_and_size_and_format {}; +template <> struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; + +template <> +struct vector_info : type_and_size_and_format {}; +template <> struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; + +template <> +struct vector_info : type_and_size_and_format {}; +template <> struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; \ No newline at end of file diff --git a/tests/catch/include/resource_guards.hh b/tests/catch/include/resource_guards.hh index 7e6179c81a..a9c7512a3d 100644 --- a/tests/catch/include/resource_guards.hh +++ b/tests/catch/include/resource_guards.hh @@ -19,6 +19,7 @@ THE SOFTWARE. #pragma once +#include #include #include @@ -80,10 +81,8 @@ template class LinearAllocGuard { } } - T* ptr() { return ptr_; }; - T* const ptr() const { return ptr_; }; - T* host_ptr() { return host_ptr_; } - T* const host_ptr() const { return host_ptr(); } + T* ptr() const { return ptr_; }; + T* host_ptr() const { return host_ptr_; } private: const LinearAllocs allocation_type_; @@ -91,6 +90,112 @@ template class LinearAllocGuard { T* host_ptr_ = nullptr; }; +template class LinearAllocGuardMultiDim { + protected: + LinearAllocGuardMultiDim(hipExtent extent) : extent_{extent} {} + + ~LinearAllocGuardMultiDim() { static_cast(hipFree(pitched_ptr_.ptr)); } + + public: + T* ptr() const { return reinterpret_cast(pitched_ptr_.ptr); }; + + size_t pitch() const { return pitched_ptr_.pitch; } + + hipExtent extent() const { return extent_; } + + hipPitchedPtr pitched_ptr() const { return pitched_ptr_; } + + size_t width() const { return extent_.width; } + + size_t width_logical() const { return extent_.width / sizeof(T); } + + size_t height() const { return extent_.height; } + + public: + hipPitchedPtr pitched_ptr_; + const hipExtent extent_; +}; + +template class LinearAllocGuard2D : public LinearAllocGuardMultiDim { + public: + LinearAllocGuard2D(const size_t width_logical, const size_t height) + : LinearAllocGuardMultiDim{make_hipExtent(width_logical * sizeof(T), height, 1)} { + HIP_CHECK(hipMallocPitch(&this->pitched_ptr_.ptr, &this->pitched_ptr_.pitch, + this->extent_.width, this->extent_.height)); + } + + LinearAllocGuard2D(const LinearAllocGuard2D&) = delete; + LinearAllocGuard2D(LinearAllocGuard2D&&) = delete; +}; + +template class LinearAllocGuard3D : public LinearAllocGuardMultiDim { + public: + LinearAllocGuard3D(const size_t width_logical, const size_t height, const size_t depth) + : LinearAllocGuardMultiDim{make_hipExtent(width_logical * sizeof(T), height, depth)} { + HIP_CHECK(hipMalloc3D(&this->pitched_ptr_, this->extent_)); + } + + LinearAllocGuard3D(const hipExtent extent) : LinearAllocGuardMultiDim(extent) { + HIP_CHECK(hipMalloc3D(&this->pitched_ptr_, this->extent_)); + } + + LinearAllocGuard3D(const LinearAllocGuard3D&) = delete; + LinearAllocGuard3D(LinearAllocGuard3D&&) = delete; + + size_t depth() const { return this->extent_.depth; } +}; + +template class ArrayAllocGuard { + public: + // extent should contain logical width + ArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u) : extent_{extent} { + hipChannelFormatDesc desc = hipCreateChannelDesc(); + HIP_CHECK(hipMalloc3DArray(&ptr_, &desc, extent_, flags)); + } + + ~ArrayAllocGuard() { static_cast(hipFreeArray(ptr_)); } + + ArrayAllocGuard(const ArrayAllocGuard&) = delete; + ArrayAllocGuard(ArrayAllocGuard&&) = delete; + + hipArray_t ptr() const { return ptr_; } + + hipExtent extent() const { return extent_; } + + private: + hipArray_t ptr_ = nullptr; + const hipExtent extent_; +}; + +template class DrvArrayAllocGuard { + public: + // extent should contain width in bytes + DrvArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u) : extent_{extent} { + HIP_ARRAY3D_DESCRIPTOR desc{}; + using vec_info = vector_info; + desc.Format = vec_info::format; + desc.NumChannels = vec_info::size; + desc.Width = extent_.width / sizeof(T); + desc.Height = extent_.height; + desc.Depth = extent_.depth; + desc.Flags = flags; + HIP_CHECK(hipArray3DCreate(&ptr_, &desc)); + } + + ~DrvArrayAllocGuard() { static_cast(hipArrayDestroy(ptr_)); } + + DrvArrayAllocGuard(const DrvArrayAllocGuard&) = delete; + DrvArrayAllocGuard(DrvArrayAllocGuard&&) = delete; + + hiparray ptr() const { return ptr_; } + + hipExtent extent() const { return extent_; } + + private: + hiparray ptr_ = nullptr; + const hipExtent extent_; +}; + enum class Streams { nullstream, perThread, created }; class StreamGuard { diff --git a/tests/catch/include/utils.hh b/tests/catch/include/utils.hh index 9edffc6f7c..bbab2322fe 100644 --- a/tests/catch/include/utils.hh +++ b/tests/catch/include/utils.hh @@ -54,6 +54,37 @@ void ArrayFindIfNot(T* const array, const T expected_value, const size_t num_ele ArrayFindIfNot(array, array + num_elements, expected_value); } +template +void PitchedMemoryVerify(T* const ptr, const size_t pitch, const size_t width, const size_t height, + const size_t depth, F expected_value_generator) { + for (size_t z = 0; z < depth; ++z) { + for (size_t y = 0; y < height; ++y) { + for (size_t x = 0; x < width; ++x) { + const auto slice = reinterpret_cast(ptr) + pitch * height * z; + const auto row = slice + pitch * y; + if (reinterpret_cast(row)[x] != expected_value_generator(x, y, z)) { + INFO("Mismatch at indices: " << x << ", " << y << ", " << z); + REQUIRE(reinterpret_cast(row)[x] == expected_value_generator(x, y, z)); + } + } + } + } +} + +template +void PitchedMemorySet(T* const ptr, const size_t pitch, const size_t width, const size_t height, + const size_t depth, F expected_value_generator) { + for (size_t z = 0; z < depth; ++z) { + for (size_t y = 0; y < height; ++y) { + for (size_t x = 0; x < width; ++x) { + const auto slice = reinterpret_cast(ptr) + pitch * height * z; + const auto row = slice + pitch * y; + reinterpret_cast(row)[x] = expected_value_generator(x, y, z); + } + } + } +} + template __global__ void VectorIncrement(T* const vec, const T increment_value, size_t N) { size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); @@ -82,6 +113,18 @@ static __global__ void Delay(uint32_t interval, const uint32_t ticks_per_ms) { } } +template +__global__ void Iota(T* const out, size_t pitch, size_t w, size_t h, size_t d) { + const auto x = blockIdx.x * blockDim.x + threadIdx.x; + const auto y = blockIdx.y * blockDim.y + threadIdx.y; + const auto z = blockIdx.z * blockDim.z + threadIdx.z; + if (x < w && y < h && z < d) { + char* const slice = reinterpret_cast(out) + pitch * h * z; + char* const row = slice + pitch * y; + reinterpret_cast(row)[x] = z * w * h + y * w + x; + } +} + inline void LaunchDelayKernel(const std::chrono::milliseconds interval, const hipStream_t stream) { int ticks_per_ms = 0; // Clock rate is in kHz => number of clock ticks in a millisecond diff --git a/tests/catch/unit/memory/CMakeLists.txt b/tests/catch/unit/memory/CMakeLists.txt index 4d2d74c033..a0bfb6b5d4 100644 --- a/tests/catch/unit/memory/CMakeLists.txt +++ b/tests/catch/unit/memory/CMakeLists.txt @@ -97,6 +97,7 @@ set(TEST_SRC hipMemsetSync.cc hipMemsetAsync.cc hipMemAdvise.cc + hipMemAdvise_old.cc hipMemRangeGetAttributes.cc ) else() @@ -171,6 +172,7 @@ set(TEST_SRC hipMemsetSync.cc hipMemsetAsync.cc hipMemAdvise.cc + hipMemAdvise_old.cc hipMemRangeGetAttributes.cc ) endif() diff --git a/tests/catch/unit/memory/hipArray3DCreate.cc b/tests/catch/unit/memory/hipArray3DCreate.cc index 973868eded..4cf189611b 100644 --- a/tests/catch/unit/memory/hipArray3DCreate.cc +++ b/tests/catch/unit/memory/hipArray3DCreate.cc @@ -20,6 +20,7 @@ THE SOFTWARE. #include #include "DriverContext.hh" #include "hipArrayCommon.hh" +#include "hip_array_common.hh" #include "hip_test_common.hh" namespace { diff --git a/tests/catch/unit/memory/hipArrayCommon.hh b/tests/catch/unit/memory/hipArrayCommon.hh index b40014b490..b0beeb3126 100644 --- a/tests/catch/unit/memory/hipArrayCommon.hh +++ b/tests/catch/unit/memory/hipArrayCommon.hh @@ -26,66 +26,6 @@ THE SOFTWARE. constexpr size_t BlockSize = 16; -template struct type_and_size_and_format { - using type = T; - static constexpr size_t size = N; - static constexpr hipArray_Format format = Format; -}; - -// Create a map of type to scalar type, vector size and scalar type format enum. -// This is useful for creating simpler function that depend on the vector size. -template struct vector_info; -template <> -struct vector_info : type_and_size_and_format {}; -template <> struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; - -template <> -struct vector_info : type_and_size_and_format {}; -template <> struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; - -template <> -struct vector_info : type_and_size_and_format {}; -template <> struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; - // read from a texture using normalized coordinates constexpr size_t ChannelToRead = 1; template diff --git a/tests/catch/unit/memory/hipArrayCreate.cc b/tests/catch/unit/memory/hipArrayCreate.cc index 6cc535593a..70a8636922 100644 --- a/tests/catch/unit/memory/hipArrayCreate.cc +++ b/tests/catch/unit/memory/hipArrayCreate.cc @@ -27,6 +27,7 @@ hipArrayCreate API test scenarios #include #include #include +#include #include "hipArrayCommon.hh" #include "DriverContext.hh" diff --git a/tests/catch/unit/memory/hipFree.cc b/tests/catch/unit/memory/hipFree.cc index 1248deebc1..b29854271c 100644 --- a/tests/catch/unit/memory/hipFree.cc +++ b/tests/catch/unit/memory/hipFree.cc @@ -22,6 +22,7 @@ THE SOFTWARE. #include +#include #include "hipArrayCommon.hh" #include "DriverContext.hh" diff --git a/tests/catch/unit/memory/hipMallocArray.cc b/tests/catch/unit/memory/hipMallocArray.cc index b6c4939b1e..530eb11077 100644 --- a/tests/catch/unit/memory/hipMallocArray.cc +++ b/tests/catch/unit/memory/hipMallocArray.cc @@ -26,6 +26,7 @@ hipMallocArray API test scenarios */ #include +#include #include #include #include "hipArrayCommon.hh" diff --git a/tests/catch/unit/memory/hipMemAdvise.cc b/tests/catch/unit/memory/hipMemAdvise.cc index cef164e2ff..5819aee9ee 100644 --- a/tests/catch/unit/memory/hipMemAdvise.cc +++ b/tests/catch/unit/memory/hipMemAdvise.cc @@ -1,941 +1,322 @@ /* -Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* Test Case Description: - Scenario-1: The following Function Tests the working of flags which can be - assigned to HMM memory using hipMemAdvise() api - Scenario-2: Negative tests on hipMemAdvise() api - Scenario-3: The following function tests various scenarios around the flag - 'hipMemAdviseSetPreferredLocation' using HMM memory and hipMemAdvise() api - Scenario-4: The following function tests various scenarios around the flag - 'hipMemAdviseSetReadMostly' using HMM memory and hipMemAdvise() api - Scenario-5: The following function verifies if assigning of a flag - invalidates the earlier flag which was assigned to the same memory region - using hipMemAdvise() - Scenario-6: The following function tests if peers can set - hipMemAdviseSetAccessedBy flag - on HMM memory prefetched on each of the other gpus - Scenario-7: Set AccessedBy flag and check value returned by - hipMemRangeGetAttribute() It should be -2(same is observed on cuda) - Scenario-8: Set AccessedBy flag to device 0 on Hmm memory and prefetch the - memory to device 1, then probe for AccessedBy flag using - hipMemRangeGetAttribute() we should still see the said flag is set for - device 0 - Scenario-9: 1) Set AccessedBy to device 0 followed by PreferredLocation to - device 1 check for AccessedBy flag using hipMemRangeGetAttribute() it should - return 0 - 2) Unset AccessedBy to 0 and set it to device 1 followed by - PreferredLocation to device 1, check for AccessedBy flag using - hipMemRangeGetAttribute() it should return 1 - Scenario-10: Set AccessedBy flag to HMM memory launch a kernel and then unset - AccessedBy, launch kernel. We should not have any access issues - Scenario-11: Allocate memory using aligned_alloc(), assign PreferredLocation - flag to the allocated memory and launch a kernel. Kernel should get executed - successfully without hang or segfault - Scenario-12: Allocate Hmm memory, set advise to PreferredLocation and then - get attribute using the api hipMemRangeGetAttribute() for - hipMemRangeAttributeLastPrefetchLocation the value returned should be -2 - Scenario-13: Allocate HMM memory, set PreferredLocation to device 0, Prfetch - the mem to device1, probe for hipMemRangeAttributeLastPrefetchLocation using - hipMemRangeGetAttribute(), we should get 1 - Scenario-14: Allocate HMM memory, set ReadMostly followed by - PreferredLocation, probe for hipMemRangeAttributeReadMostly and - hipMemRangeAttributePreferredLocation - using hipMemRangeGetAttribute() we should observe 1 and 0 correspondingly. - In other words setting of hipMemRangeAttributePreferredLocation should not - impact hipMemRangeAttributeReadMostly advise to the memory - Scenario-15: Allocate Hmm memory, advise it to ReadMostly for gpu: 0 and - launch kernel on all other gpus except 0. This test case may discover any - effect or access denial case arising due to setting ReadMostly only to a - particular gpu +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. */ #include -#if __linux__ -#include -#include -#include -#endif - -// Kernel function -__global__ void MemAdvseKernel(int n, int *x) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - if (index < n) - x[index] = x[index] * x[index]; -} - -// Kernel -__global__ void MemAdvise2(int *Hmm, int n) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - int stride = blockDim.x * gridDim.x; - for (int i = index; i < n; i += stride) { - Hmm[i] = Hmm[i] + 10; - } -} - -// Kernel -__global__ void MemAdvise3(int *Hmm, int *Hmm1, int n) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - int stride = blockDim.x * gridDim.x; - for (int i = index; i < n; i += stride) { - Hmm1[i] = Hmm[i] + 10; +#include +#include +#include + +static inline hipMemoryAdvise GetUnsetMemAdvice(const hipMemoryAdvise advice) { + switch (advice) { + case hipMemAdviseSetAccessedBy: + return hipMemAdviseUnsetAccessedBy; + case hipMemAdviseSetReadMostly: + return hipMemAdviseUnsetReadMostly; + case hipMemAdviseSetPreferredLocation: + return hipMemAdviseUnsetPreferredLocation; + default: + assert("Invalid hipMemoryAdvise enumerator"); + return advice; } } - -static bool CheckError(hipError_t err, int LineNo) { - if (err == hipSuccess) { - WARN("Error expected but received hipSuccess at line no.:" << LineNo); - return false; - } else { - return true; +static inline hipMemRangeAttribute GetMemAdviceAttr(const hipMemoryAdvise advice) { + switch (advice) { + case hipMemAdviseSetAccessedBy: + return hipMemRangeAttributeAccessedBy; + case hipMemAdviseSetReadMostly: + return hipMemRangeAttributeReadMostly; + case hipMemAdviseSetPreferredLocation: + return hipMemRangeAttributePreferredLocation; + default: + assert("Invalid hipMemoryAdvise enumerator"); + return static_cast(-1); } } -static int HmmAttrPrint() { - int managed = 0; - WARN("The following are the attribute values related to HMM for" - " device 0:\n"); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributeDirectManagedMemAccessFromHost, 0)); - WARN("hipDeviceAttributeDirectManagedMemAccessFromHost: " << managed); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributeConcurrentManagedAccess, 0)); - WARN("hipDeviceAttributeConcurrentManagedAccess: " << managed); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributePageableMemoryAccess, 0)); - WARN("hipDeviceAttributePageableMemoryAccess: " << managed); - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributePageableMemoryAccessUsesHostPageTables, 0)); - WARN("hipDeviceAttributePageableMemoryAccessUsesHostPageTables:" << managed); - - HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, - 0)); - WARN("hipDeviceAttributeManagedMemory: " << managed); - return managed; -} - - -// The following Function Tests the working of flags which can be assigned -// to HMM memory using hipMemAdvise() api -TEST_CASE("Unit_hipMemAdvise_TstFlags") { - int MangdMem = HmmAttrPrint(); - if (MangdMem == 1) { - bool IfTestPassed = true; - int NumDevs = 0, *Outpt = nullptr; - int MEM_SIZE = 4*1024, A_CONST = 9999; - float *Hmm = nullptr; - int AttrVal = 0; - HIP_CHECK(hipGetDeviceCount(&NumDevs)); - Outpt = new int[NumDevs]; - HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE * 2, hipMemAttachGlobal)); - // With the following for loop we iterate through each of the Gpus in the - // system set and unset the flags and check the behavior. - for (int i = 0; i < NumDevs; ++i) { - HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, hipMemAdviseSetReadMostly, i)); - HIP_CHECK(hipMemRangeGetAttribute(&AttrVal, sizeof(AttrVal), - hipMemRangeAttributeReadMostly, Hmm, - MEM_SIZE * 2)); - if (AttrVal != 1) { - WARN("Attempt to set hipMemAdviseSetReadMostly flag failed!\n"); - IfTestPassed = false; - } - HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, hipMemAdviseUnsetReadMostly, - i)); - - HIP_CHECK(hipMemRangeGetAttribute(&AttrVal, sizeof(AttrVal), - hipMemRangeAttributeReadMostly, Hmm, - (MEM_SIZE * 2))); - if (AttrVal != 0) { - WARN("Attempt to Unset hipMemAdviseSetReadMostly flag failed!\n"); - IfTestPassed = false; - } - AttrVal = A_CONST; - // Currently hipMemAdviseSetPreferredLocation and - // hipMemAdviseSetAccessedBy - // flags are resulting in issues: SWDEV-267357 - HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, - hipMemAdviseSetPreferredLocation, i)); - HIP_CHECK(hipMemRangeGetAttribute(&AttrVal, sizeof(AttrVal), - hipMemRangeAttributePreferredLocation, - Hmm, (MEM_SIZE * 2))); - if (AttrVal != i) { - WARN("Attempt to set hipMemAdviseSetPreferredLocation flag failed!\n"); - IfTestPassed = false; - } - AttrVal = A_CONST; - HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, - hipMemAdviseUnsetPreferredLocation, i)); - HIP_CHECK(hipMemRangeGetAttribute(&AttrVal, sizeof(AttrVal), - hipMemRangeAttributePreferredLocation, - Hmm, (MEM_SIZE * 2))); - if (AttrVal == i) { - WARN("Attempt to Unset hipMemAdviseUnsetPreferredLocation "); - WARN("flag failed!\n"); - IfTestPassed = false; - } - for (int m = 0; m < NumDevs; ++m) { - Outpt[m] = A_CONST; - } - HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, hipMemAdviseSetAccessedBy, i)); - HIP_CHECK(hipMemRangeGetAttribute(Outpt, sizeof(Outpt), - hipMemRangeAttributeAccessedBy, Hmm, - (MEM_SIZE * 2))); - if ((Outpt[0]) != i) { - WARN("Attempt to set hipMemAdviseSetAccessedBy flag failed!\n"); - IfTestPassed = false; - } - for (int m = 0; m < NumDevs; ++m) { - Outpt[m] = A_CONST; - } - HIP_CHECK(hipMemAdvise(Hmm , MEM_SIZE * 2, hipMemAdviseUnsetAccessedBy, - i)); - HIP_CHECK(hipMemRangeGetAttribute(Outpt, sizeof(Outpt), - hipMemRangeAttributeAccessedBy, Hmm, - (MEM_SIZE * 2))); - if ((Outpt[0]) >= 0) { - WARN("Attempt to Unset hipMemAdviseUnsetAccessedBy flag failed!\n"); - IfTestPassed = false; - } +std::vector GetDevicesWithAdviseSupport() { + const auto device_count = HipTest::getDeviceCount(); + std::vector supported_devices; + supported_devices.reserve(device_count + 1); + for (int i = 0; i < device_count; ++i) { + if (DeviceAttributesSupport(i, hipDeviceAttributeManagedMemory, + hipDeviceAttributeConcurrentManagedAccess)) { + supported_devices.push_back(i); } - delete [] Outpt; - HIP_CHECK(hipFree(Hmm)); - REQUIRE(IfTestPassed); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); } + return supported_devices; } -TEST_CASE("Unit_hipMemAdvise_NegtveTsts") { - int MangdMem = HmmAttrPrint(); - if (MangdMem == 1) { - bool IfTestPassed = true; - int NumDevs = 0, MEM_SIZE = 4*1024; - float *Hmm = nullptr; - std::string str; - HIP_CHECK(hipGetDeviceCount(&NumDevs)); - HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE * 2, hipMemAttachGlobal)); -#if HT_AMD - // Passing invalid value(99) device param - IfTestPassed &= CheckError(hipMemAdvise(Hmm, MEM_SIZE * 2, - hipMemAdviseSetReadMostly, 99), __LINE__); - - // Passing invalid value(-12) device param - IfTestPassed &= CheckError(hipMemAdvise(Hmm, MEM_SIZE * 2, - hipMemAdviseSetReadMostly, -12), __LINE__); -#endif - // Passing NULL as first parameter instead of valid pointer to a memory - IfTestPassed &= CheckError(hipMemAdvise(NULL, MEM_SIZE * 2, - hipMemAdviseSetReadMostly, 0), __LINE__); - - // Passing 0 for count(2nd param) parameter - IfTestPassed &= CheckError(hipMemAdvise(Hmm, 0, hipMemAdviseSetReadMostly, - 0), __LINE__); - - // Passing count much more than actually allocated value - IfTestPassed &= CheckError(hipMemAdvise(Hmm, MEM_SIZE * 6, - hipMemAdviseSetReadMostly, 0), __LINE__); - - REQUIRE(IfTestPassed); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); +TEST_CASE("Unit_hipMemAdvise_Set_Unset_Basic") { + auto supported_devices = GetDevicesWithAdviseSupport(); + if (supported_devices.empty()) { + HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); + return; } + supported_devices.push_back(hipCpuDeviceId); + const auto device = GENERATE_COPY(from_range(supported_devices)); + + const auto SetUnset = [=](const hipMemoryAdvise advice) { + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); + int32_t attribute = 0; + HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, advice, device)); + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), GetMemAdviceAttr(advice), + alloc.ptr(), kPageSize)); + REQUIRE((advice == hipMemAdviseSetReadMostly ? 1 : device) == attribute); + HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, GetUnsetMemAdvice(advice), device)); + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), GetMemAdviceAttr(advice), + alloc.ptr(), kPageSize)); + REQUIRE((advice == hipMemAdviseSetReadMostly ? 0 : hipInvalidDeviceId) == attribute); + }; + +// Disabled due to defect - EXSWHTEC-132 +#if HT_NVIDIA + SECTION("hipMemAdviseSetAccessedBy") { SetUnset(hipMemAdviseSetAccessedBy); } +#endif + SECTION("hipMemAdviseSetReadMostly") { SetUnset(hipMemAdviseSetReadMostly); } + SECTION("hipMemAdviseSetPreferredLocation") { SetUnset(hipMemAdviseSetPreferredLocation); } } -// The following function tests various scenarios around the flag -// 'hipMemAdviseSetPreferredLocation' using HMM memory and hipMemAdvise() api -TEST_CASE("Unit_hipMemAdvise_PrefrdLoc") { - int MangdMem = HmmAttrPrint(); - if (MangdMem == 1) { - // Check that when a page fault occurs for the memory region set to devPtr, - // the data is migrated to the destn processor - int MEM_SIZE = 4096, A_CONST = 9999; - int *Hmm = nullptr, NumDevs = 0, dev = A_CONST; - bool IfTestPassed = true; - HIP_CHECK(hipGetDeviceCount(&NumDevs)); - HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE * 3, hipMemAttachGlobal)); - for (int i = 0; i < ((MEM_SIZE * 3)/4); ++i) { - Hmm[i] = 4; - } - for (int devId = 0; devId < NumDevs; ++devId) { - HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE * 3, - hipMemAdviseSetPreferredLocation, devId)); - int NumElms = ((MEM_SIZE * 3)/4); - MemAdvseKernel<<>>(NumElms, Hmm); - int dev = A_CONST; - HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(dev), - hipMemRangeAttributePreferredLocation, - Hmm, MEM_SIZE * 3)); - if (dev != devId) { - WARN("Memory observed to be not available on expected location\n"); - WARN("line no: " << __LINE__); - WARN("dev: " << dev); - IfTestPassed = false; - } - } - - // Check that when preferred location is set for a memory region, - // data can still be prefetched using hipMemPrefetchAsync - hipStream_t strm; - dev = A_CONST; - for (int devId = 0; devId < NumDevs; ++devId) { - HIP_CHECK(hipSetDevice(devId)); - HIP_CHECK(hipStreamCreate(&strm)); - HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE * 3, - hipMemAdviseSetPreferredLocation, devId)); - HIP_CHECK(hipMemPrefetchAsync(Hmm, MEM_SIZE * 3, devId, strm)); - HIP_CHECK(hipStreamSynchronize(strm)); - HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(dev), - hipMemRangeAttributeLastPrefetchLocation, - Hmm, MEM_SIZE * 3)); - if (dev != devId) { - WARN("Memory reported to be not available at the Prefetched "); - WARN("location with device id: " << devId); - WARN("line no: " << __LINE__); - WARN("dev: " << dev); - IfTestPassed = false; - } - HIP_CHECK(hipStreamDestroy(strm)); - } - HIP_CHECK(hipFree(Hmm)); - REQUIRE(IfTestPassed); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); +TEST_CASE("Unit_hipMemAdvise_No_Flag_Interference") { + auto supported_devices = GetDevicesWithAdviseSupport(); + if (supported_devices.empty()) { + HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); + return; } -} - -// The following function tests various scenarios around the flag -// 'hipMemAdviseSetReadMostly' using HMM memory and hipMemAdvise() api - -TEST_CASE("Unit_hipMemAdvise_ReadMostly") { - int MangdMem = HmmAttrPrint(); - if (MangdMem == 1) { - bool IfTestPassed = true; - int MEM_SIZE = 4096, A_CONST = 9999; - float *Hmm = nullptr; - HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE)); - for (uint64_t i = 0; i < (MEM_SIZE/sizeof(float)); ++i) { - Hmm[i] = A_CONST; - } - HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetReadMostly, 0)); - // Checking if the data can be read after setting hipMemAdviseSetReadMostly - for (uint64_t i = 0; i < (MEM_SIZE/sizeof(float)); ++i) { - if (Hmm[i] != A_CONST) { - WARN("Didn't find expected value in Hmm memory after setting"); - WARN(" hipMemAdviseSetReadMostly flag line no.: " << __LINE__); - IfTestPassed = false; - } - } + supported_devices.push_back(hipCpuDeviceId); + const auto device = GENERATE_COPY(from_range(supported_devices)); - // Checking if the memory region can be modified - for (uint64_t i = 0; i < (MEM_SIZE/sizeof(float)); ++i) { - Hmm[i] = A_CONST; - } + std::array advice{hipMemAdviseSetReadMostly, hipMemAdviseSetPreferredLocation, + hipMemAdviseSetAccessedBy}; + for (int i = 0; i < 6; ++i) { + std::next_permutation(std::begin(advice), std::end(advice)); + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); - for (uint64_t i = 0; i < (MEM_SIZE/sizeof(float)); ++i) { - if (Hmm[i] != A_CONST) { - WARN("Didn't find expected value in Hmm memory after Modification\n"); - WARN("line no.: " << __LINE__); - IfTestPassed = false; - } + for (const auto a : advice) { + HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, a, device)); } - int out = A_CONST; - HIP_CHECK(hipMemRangeGetAttribute(&out, 4, hipMemRangeAttributeReadMostly, - Hmm, MEM_SIZE)); - if (out != 1) { - WARN("out value: " << out); - IfTestPassed = false; + for (const auto a : advice) { + int32_t attribute = 0; + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), GetMemAdviceAttr(a), + alloc.ptr(), kPageSize)); + REQUIRE((a == hipMemAdviseSetReadMostly ? 1 : device) == attribute); } - // Checking the advise attribute after prefetch - HIP_CHECK(hipMemPrefetchAsync(Hmm, MEM_SIZE, 0, 0)); - HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipMemRangeGetAttribute(&out, sizeof(int), - hipMemRangeAttributeReadMostly, Hmm, - MEM_SIZE)); - if (out != 1) { - WARN("Attribute assigned to memory changed after calling "); - WARN("hipMemPrefetchAsync(). line no.: " << __LINE__); - WARN("out value: " << out); - IfTestPassed = false; - } - HIP_CHECK(hipFree(Hmm)); - REQUIRE(IfTestPassed); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); } } -// The following function verifies if assigning of a flag invalidates the -// earlier flag which was assigned to the same memory region using -// hipMemAdvise() -TEST_CASE("Unit_hipMemAdvise_TstFlgOverrideEffect") { - int MangdMem = HmmAttrPrint(); - if (MangdMem == 1) { - bool IfTestPassed = true; - int MEM_SIZE = 4*4096, A_CONST = 9999; - float *Hmm = nullptr; - int NumDevs = 0, dev = A_CONST; - - HIP_CHECK(hipGetDeviceCount(&NumDevs)); - HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE, hipMemAttachGlobal)); - for (int i = 0; i < NumDevs; ++i) { - HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetReadMostly, i)); - HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(int), - hipMemRangeAttributeReadMostly, Hmm, - MEM_SIZE)); - if (dev != 1) { - WARN("hipMemAdviseSetReadMostly flag did not take affect despite "); - WARN("setting it using hipMemAdvise(). line no.: " << __LINE__); - IfTestPassed = false; - break; - } - dev = A_CONST; - HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetPreferredLocation, - i)); - HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(int), - hipMemRangeAttributePreferredLocation, - Hmm, MEM_SIZE)); - if (dev != i) { - WARN("hipMemAdviseSetPreferredLocation flag did not take affect "); - WARN("despite setting it using hipMemAdvise()\n"); - WARN("line no.: " << __LINE__); - IfTestPassed = false; - break; - } - - HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetAccessedBy, i)); - dev = A_CONST; - HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(int), - hipMemRangeAttributeAccessedBy, Hmm, - MEM_SIZE)); - if (dev != i) { - WARN("hipMemAdviseSetAccessedBy flag did not take affect despite "); - WARN("setting it using hipMemAdvise(). line no.: " << __LINE__); - IfTestPassed = false; - break; - } - HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseUnsetAccessedBy, i)); - } - HIP_CHECK(hipFree(Hmm)); - REQUIRE(IfTestPassed); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); +TEST_CASE("Unit_hipMemAdvise_Rounding") { + auto supported_devices = GetDevicesWithAdviseSupport(); + if (supported_devices.empty()) { + HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); + return; } + supported_devices.push_back(hipCpuDeviceId); + const auto device = supported_devices.front(); + + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, 3 * kPageSize); + REQUIRE_FALSE(reinterpret_cast(alloc.ptr()) % kPageSize); + const auto [offset, width] = + GENERATE_COPY(std::make_pair(kPageSize / 4, kPageSize / 2), // Withing page + std::make_pair(kPageSize / 2, kPageSize), // Across page border + std::make_pair(kPageSize / 2, kPageSize * 2)); // Across two page borders + HIP_CHECK(hipMemAdvise(alloc.ptr() + offset, width, hipMemAdviseSetAccessedBy, device)); + constexpr auto RoundDown = [](const intptr_t a, const intptr_t n) { return a - a % n; }; + constexpr auto RoundUp = [RoundDown](const intptr_t a, const intptr_t n) { + return RoundDown(a + n - 1, n); + }; + const auto base = alloc.ptr(); + const auto rounded_up = RoundUp(offset + width, kPageSize); + unsigned int attribute = 0; + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), hipMemRangeAttributeAccessedBy, + reinterpret_cast(base), rounded_up)); + REQUIRE(device == attribute); + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), hipMemRangeAttributeAccessedBy, + alloc.ptr(), 3 * kPageSize)); + REQUIRE((rounded_up == 3 * kPageSize ? device : hipInvalidDeviceId) == attribute); } - -// The following function tests if peers can set hipMemAdviseSetAccessedBy flag -// on HMM memory prefetched on each of the other gpus -#if HT_AMD -TEST_CASE("Unit_hipMemAdvise_TstAccessedByPeer") { - int MangdMem = HmmAttrPrint(); - if (MangdMem == 1) { - bool IfTestPassed = true; - int *Hmm = nullptr, MEM_SIZE = 4*4096, A_CONST = 9999;; - int NumDevs = 0, CanAccessPeer = A_CONST, flag = 0; - - HIP_CHECK(hipGetDeviceCount(&NumDevs)); - if (NumDevs < 2) { - SUCCEED("Test TestSetAccessedByPeer() need atleast two Gpus to test" - " the scenario. This system has GPUs less than 2"); - } - HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE, hipMemAttachGlobal)); - for (int i = 0; i < NumDevs; ++i) { - HIP_CHECK(hipMemPrefetchAsync(Hmm, MEM_SIZE, i, 0)); - for (int j = 0; j < NumDevs; ++j) { - if (i == j) - continue; - HIP_CHECK(hipSetDevice(j)); - HIP_CHECK(hipDeviceCanAccessPeer(&CanAccessPeer, j, i)); - if (CanAccessPeer) { - HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetAccessedBy, j)); - for (uint64_t m = 0; m < (MEM_SIZE/sizeof(int)); ++m) { - Hmm[m] = 4; - } - HIP_CHECK(hipDeviceEnablePeerAccess(i, 0)); - MemAdvseKernel<<<(MEM_SIZE/sizeof(int)/32), 32>>>( - (MEM_SIZE/sizeof(int)), Hmm); - HIP_CHECK(hipDeviceSynchronize()); - // Verifying the result - for (uint64_t m = 0; m < (MEM_SIZE/sizeof(int)); ++m) { - if (Hmm[m] != 16) { - flag = 1; - } - } - if (flag) { - WARN("Didnt get Expected results with device: " << j); - WARN("line no.: " << __LINE__); - IfTestPassed = false; - flag = 0; - } - } - } - } - HIP_CHECK(hipFree(Hmm)); - REQUIRE(IfTestPassed); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); +TEST_CASE("Unit_hipMemAdvise_Flags_Do_Not_Cause_Prefetch") { + auto supported_devices = GetDevicesWithAdviseSupport(); + if (supported_devices.empty()) { + HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); } -} + supported_devices.push_back(hipCpuDeviceId); + + const auto Test = [](const int device, const hipMemoryAdvise advice) { + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); + HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, advice, device)); + int32_t attribute = 0; + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), + hipMemRangeAttributeLastPrefetchLocation, alloc.ptr(), + kPageSize)); + REQUIRE(attribute == hipInvalidDeviceId); + }; + const auto device = + GENERATE_COPY(from_range(std::begin(supported_devices), std::end(supported_devices))); + + SECTION("hipMemAdviseSetPreferredLocation") { Test(device, hipMemAdviseSetPreferredLocation); } +// Disabled on AMD due to defect - EXSWHTEC-132 +#if HT_NVIDIA + SECTION("hipMemAdviseSetAccessedBy") { Test(device, hipMemAdviseSetAccessedBy); } #endif - - -/* Set AccessedBy flag and check value returned by hipMemRangeGetAttribute() - It should be -2(same is observed on cuda)*/ -TEST_CASE("Unit_hipMemAdvise_TstAccessedByFlg") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Hmm = NULL, data = 999; - HIP_CHECK(hipMallocManaged(&Hmm, 2*4096)); - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetAccessedBy, 0)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributeLastPrefetchLocation, - Hmm, 2*4096)); - if (data != -2) { - WARN("Didnt get expected value!!\n"); - REQUIRE(false); - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); - } } -/* Set AccessedBy flag to device 0 on Hmm memory and prefetch the memory to - device 1, then probe for AccessedBy flag using hipMemRangeGetAttribute() - we should still see the said flag is set for device 0*/ -TEST_CASE("Unit_hipMemAdvise_TstAccessedByFlg2") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Hmm = NULL, data = 999, Ngpus = 0; - HIP_CHECK(hipGetDeviceCount(&Ngpus)); - if (Ngpus >= 2) { - hipStream_t strm; - HIP_CHECK(hipStreamCreate(&strm)); - HIP_CHECK(hipMallocManaged(&Hmm, 2*4096)); - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetAccessedBy, 0)); - HIP_CHECK(hipMemPrefetchAsync(Hmm, 2*4096, 1, strm)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributeAccessedBy, Hmm, 2*4096)); - if (data != 0) { - WARN("Didnt get expected behavior at line: " << __LINE__); - REQUIRE(false); - } - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseUnsetAccessedBy, 0)); - HIP_CHECK(hipStreamDestroy(strm)); - HIP_CHECK(hipFree(Hmm)); - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); - } -} - - - -/* 1) Set AccessedBy to device 0 followed by PreferredLocation to device 1 - check for AccessedBy flag using hipMemRangeGetAttribute() it should - return 0 - 2) Unset AccessedBy to 0 and set it to device 1 followed by - PreferredLocation to device 1, check for AccessedBy flag using - hipMemRangeGetAttribute() it should return 1*/ - -TEST_CASE("Unit_hipMemAdvise_TstAccessedByFlg3") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Hmm = NULL, data = 999, Ngpus = 0; - HIP_CHECK(hipGetDeviceCount(&Ngpus)); - if (Ngpus >= 2) { - HIP_CHECK(hipMallocManaged(&Hmm, 2*4096)); - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetAccessedBy, 0)); - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetPreferredLocation, 1)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributeAccessedBy, Hmm, 2*4096)); - if (data != 0) { - WARN("Didnt get expected behavior at line: " << __LINE__); - REQUIRE(false); - } - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseUnsetAccessedBy, 0)); - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetAccessedBy, 1)); - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetPreferredLocation, 0)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributeAccessedBy, Hmm, 2*4096)); - if (data != 1) { - WARN("Didnt get expected behavior at line: " << __LINE__); - REQUIRE(false); - } - HIP_CHECK(hipFree(Hmm)); - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); +TEST_CASE("Unit_hipMemAdvise_Read_Write_After_Advise") { + auto supported_devices = GetDevicesWithAdviseSupport(); + if (supported_devices.empty()) { + HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); } -} - - -/* Set AccessedBy flag to HMM memory launch a kernel and then unset - AccessedBy, launch kernel. We should not have any access issues*/ - -TEST_CASE("Unit_hipMemAdvise_TstAccessedByFlg4") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Hmm = NULL, NumElms = (1024 * 1024), InitVal = 123, blockSize = 64; - int DataMismatch = 0; - hipStream_t strm; - HIP_CHECK(hipStreamCreate(&strm)); - HIP_CHECK(hipMallocManaged(&Hmm, (NumElms * sizeof(int)))); - HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), - hipMemAdviseSetAccessedBy, 0)); - // Initializing memory - for (int i = 0; i < NumElms; ++i) { - Hmm[i] = InitVal; - } - dim3 dimBlock(blockSize, 1, 1); - dim3 dimGrid((NumElms + blockSize -1)/blockSize, 1, 1); - // launching kernel from each one of the gpus - MemAdvise2<<>>(Hmm, NumElms); - HIP_CHECK(hipStreamSynchronize(strm)); - - // verifying the final result - for (int i = 0; i < NumElms; ++i) { - if (Hmm[i] != (InitVal + 10)) { - DataMismatch++; - } - } - - if (DataMismatch != 0) { - WARN("DataMismatch is observed at line: " << __LINE__); - REQUIRE(false); - } + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); + constexpr size_t count = kPageSize / sizeof(*alloc.ptr()); + + const auto ReadWriteManagedMemory = [&](const int device, const hipMemoryAdvise advice) { + HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, advice, device)); + + std::fill_n(alloc.ptr(), count, -1); + ArrayFindIfNot(alloc.ptr(), -1, count); + for (int i = 0; i < supported_devices.size(); ++i) { + HIP_CHECK(hipSetDevice(supported_devices[i])); + VectorIncrement<<>>(alloc.ptr(), 1, count); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + ArrayFindIfNot(alloc.ptr(), i, count); + } + + int32_t attribute = 0; + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), GetMemAdviceAttr(advice), + alloc.ptr(), kPageSize)); + REQUIRE((advice == hipMemAdviseSetReadMostly ? 1 : device) == attribute); + }; + +// Disabled on AMD due to defect - EXSWHTEC-133 +#if HT_NVIDIA + SECTION("ReadMostly") { ReadWriteManagedMemory(hipInvalidDeviceId, hipMemAdviseSetReadMostly); } +#endif + supported_devices.push_back(hipCpuDeviceId); - HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), - hipMemAdviseUnsetAccessedBy, 0)); - MemAdvise2<<>>(Hmm, NumElms); - HIP_CHECK(hipStreamSynchronize(strm)); - // verifying the final result - for (int i = 0; i < NumElms; ++i) { - if (Hmm[i] != (InitVal + (2*10))) { - DataMismatch++; - } - } + const auto device = + GENERATE_COPY(from_range(std::begin(supported_devices), std::end(supported_devices))); + supported_devices.pop_back(); + SECTION("PreferredLocation") { ReadWriteManagedMemory(device, hipMemAdviseSetPreferredLocation); } - if (DataMismatch != 0) { - WARN("DataMismatch is observed at line: " << __LINE__); - REQUIRE(false); - } - HIP_CHECK(hipFree(Hmm)); - HIP_CHECK(hipStreamDestroy(strm)); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); - } +// Disabled on AMD due to defect - EXSWHTEC-132 +#if HT_NVIDIA + SECTION("AccessedBy") { ReadWriteManagedMemory(device, hipMemAdviseSetAccessedBy); } +#endif } - -/* Allocate memory using aligned_alloc(), assign PreferredLocation flag to - the allocated memory and launch a kernel. Kernel should get executed - successfully without hang or segfault*/ -#if __linux__ && HT_AMD -TEST_CASE("Unit_hipMemAdvise_TstAlignedAllocMem") { - if ((setenv("HSA_XNACK", "1", 1)) != 0) { - WARN("Unable to turn on HSA_XNACK, hence terminating the Test case!"); - REQUIRE(false); +TEST_CASE("Unit_hipMemAdvise_Prefetch_After_Advise") { + auto supported_devices = GetDevicesWithAdviseSupport(); + if (supported_devices.empty()) { + HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); } - // The following code block checks for gfx90a so as to skip if the device is not MI200 - - hipDeviceProp_t prop; - int device; - HIP_CHECK(hipGetDevice(&device)); - HIP_CHECK(hipGetDeviceProperties(&prop, device)); - std::string gfxName(prop.gcnArchName); - - if ((gfxName == "gfx90a" || gfxName.find("gfx90a:")) == 0) { - int stat = 0; - if (fork() == 0) { - // The below part should be inside fork - int managedMem = 0, pageMemAccess = 0; - HIP_CHECK(hipDeviceGetAttribute(&pageMemAccess, - hipDeviceAttributePageableMemoryAccess, 0)); - WARN("hipDeviceAttributePageableMemoryAccess:" << pageMemAccess); - - HIP_CHECK(hipDeviceGetAttribute(&managedMem, hipDeviceAttributeManagedMemory, 0)); - WARN("hipDeviceAttributeManagedMemory: " << managedMem); - if ((managedMem == 1) && (pageMemAccess == 1)) { - int *Mllc = nullptr, MemSz = 4096 * 4, NumElms = 4096, InitVal = 123; - // Mllc = reinterpret_cast<(int *)>(aligned_alloc(4096, MemSz)); - Mllc = reinterpret_cast(aligned_alloc(4096, 4096*4)); - for (int i = 0; i < NumElms; ++i) { - Mllc[i] = InitVal; - } - hipStream_t strm; - int DataMismatch = 0; - HIP_CHECK(hipStreamCreate(&strm)); - // The following hipMemAdvise() call is made to know if advise on - // aligned_alloc() is causing any issue - HIP_CHECK(hipMemAdvise(Mllc, MemSz, hipMemAdviseSetPreferredLocation, 0)); - HIP_CHECK(hipMemPrefetchAsync(Mllc, MemSz, 0, strm)); - HIP_CHECK(hipStreamSynchronize(strm)); - MemAdvise2<<<(NumElms/32), 32, 0, strm>>>(Mllc, NumElms); - HIP_CHECK(hipStreamSynchronize(strm)); - for (int i = 0; i < NumElms; ++i) { - if (Mllc[i] != (InitVal + 10)) { - DataMismatch++; - } - } - if (DataMismatch != 0) { - WARN("DataMismatch observed!!"); - exit(9); // 9 for failure - } else { - exit(10); // 10 for Pass result - } - } else { - SUCCEED("GPU 0 doesn't support ManagedMemory with hipDeviceAttributePageableMemoryAccess " - "attribute. Hence skipping the testing with Pass result.\n"); - exit(Catch::ResultDisposition::ContinueOnFailure); - } - } else { - wait(&stat); - int Result = WEXITSTATUS(stat); - if (Result == Catch::ResultDisposition::ContinueOnFailure) { - WARN("GPU 0 doesn't support ManagedMemory with hipDeviceAttributePageableMemoryAccess " - "attribute. Hence skipping the testing with Pass result.\n"); - } else { - if (Result != 10) { - REQUIRE(false); - } - } - } - } else { - SUCCEED("Memory model feature is only supported for gfx90a, Hence" - "skipping the testcase for this GPU " << device); - WARN("Memory model feature is only supported for gfx90a, Hence" - "skipping the testcase for this GPU " << device); + supported_devices.push_back(hipCpuDeviceId); + const auto advice = GENERATE(hipMemAdviseSetReadMostly, hipMemAdviseSetPreferredLocation + // Skipped due to defect - EXSWHTEC - 132 + // hipMemAdviseSetAccessedBy + ); + const auto device = GENERATE_COPY(from_range(supported_devices)); + + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); + HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, advice, device)); + + for (const auto d : supported_devices) { + HIP_CHECK(hipMemPrefetchAsync(alloc.ptr(), kPageSize, d)); + HIP_CHECK(hipStreamSynchronize(nullptr)); + int32_t attribute = 0; + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), + hipMemRangeAttributeLastPrefetchLocation, alloc.ptr(), + kPageSize)); + REQUIRE(d == attribute); } + int32_t attribute = 0; + HIP_CHECK(hipMemRangeGetAttribute(&attribute, sizeof(attribute), GetMemAdviceAttr(advice), + alloc.ptr(), kPageSize)); + REQUIRE((advice == hipMemAdviseSetReadMostly ? 1 : device) == attribute); } -#endif -/* Allocate Hmm memory, set advise to PreferredLocation and then get - attribute using the api hipMemRangeGetAttribute() for - hipMemRangeAttributeLastPrefetchLocation the value returned should be -2*/ - -TEST_CASE("Unit_hipMemAdvise_TstMemAdvisePrefrdLoc") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Hmm = NULL, data = 999; - HIP_CHECK(hipMallocManaged(&Hmm, 4096)); - HIP_CHECK(hipMemAdvise(Hmm, 4096, hipMemAdviseSetPreferredLocation, 0)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributeLastPrefetchLocation, - Hmm, 4096)); - if (data != -2) { - WARN("Didnt receive expected value."); - REQUIRE(false); - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); +TEST_CASE("Unit_hipMemAdvise_AccessedBy_All_Devices") { + auto supported_devices = GetDevicesWithAdviseSupport(); + if (supported_devices.empty()) { + HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); + return; } -} - + supported_devices.push_back(hipCpuDeviceId); -/* Allocate HMM memory, set PreferredLocation to device 0, Prfetch the mem - to device1, probe for hipMemRangeAttributeLastPrefetchLocation using - hipMemRangeGetAttribute(), we should get 1*/ - -TEST_CASE("Unit_hipMemAdvise_TstMemAdviseLstPreftchLoc") { - int NumDevs = 0; - HIP_CHECK(hipGetDeviceCount(&NumDevs)); - if (NumDevs >= 2) { - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Hmm = NULL, data = 999; - hipStream_t strm; - HIP_CHECK(hipSetDevice(1)); - HIP_CHECK(hipStreamCreate(&strm)); - HIP_CHECK(hipMallocManaged(&Hmm, 4096)); - HIP_CHECK(hipMemAdvise(Hmm, 4096, hipMemAdviseSetPreferredLocation, 0)); - HIP_CHECK(hipMemPrefetchAsync(Hmm, 4096, 1, strm)); - HIP_CHECK(hipStreamSynchronize(strm)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributeLastPrefetchLocation, - Hmm, 4096)); - if (data != 1) { - WARN("Didnt receive expected value!!"); - REQUIRE(false); - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); - } - } else { - SUCCEED("This system has less than 2 gpus hence skipping the test.\n"); + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); + for (const auto device : supported_devices) { + HIP_CHECK(hipMemAdvise(alloc.ptr(), kPageSize, hipMemAdviseSetAccessedBy, device)); } + std::vector accessed_by(supported_devices.size(), hipInvalidDeviceId); + HIP_CHECK(hipMemRangeGetAttribute(accessed_by.data(), sizeof(accessed_by.data()), + hipMemRangeAttributeAccessedBy, alloc.ptr(), kPageSize)); + REQUIRE_THAT(accessed_by, Catch::Matchers::Equals(supported_devices)); } - -/* Allocate HMM memory, set ReadMostly followed by PreferredLocation, probe - for hipMemRangeAttributeReadMostly and hipMemRangeAttributePreferredLocation - using hipMemRangeGetAttribute() we should observe 1 and 0 correspondingly. - In other words setting of hipMemRangeAttributePreferredLocation should not - impact hipMemRangeAttributeReadMostly advise to the memory*/ - -TEST_CASE("Unit_hipMemAdvise_TstMemAdviseMultiFlag") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Hmm = NULL, data = 999; - HIP_CHECK(hipMallocManaged(&Hmm, 4096)); - HIP_CHECK(hipMemAdvise(Hmm, 4096, hipMemAdviseSetReadMostly, 0)); - HIP_CHECK(hipMemAdvise(Hmm, 4096, hipMemAdviseSetPreferredLocation, 0)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributeReadMostly, Hmm, - 4096)); - if (data != 1) { - WARN("Didnt receive expected value at line: " << data); - REQUIRE(false); - } - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributePreferredLocation, Hmm, - 4096)); - if (data != 0) { - WARN("Didnt receive expected value at line: " << data); - REQUIRE(false); - } - HIP_CHECK(hipFree(Hmm)); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); +TEST_CASE("Unit_hipMemAdvise_Negative_Parameters") { + auto supported_devices = GetDevicesWithAdviseSupport(); + if (supported_devices.empty()) { + HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory"); } -} + const auto device = supported_devices.front(); + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, kPageSize); - -/*Allocate Hmm memory, advise it to ReadMostly for gpu: 0 and launch kernel - on all other gpus except 0. This test case may discover any effect or - access denial case arising due to setting ReadMostly only to a particular - gpu*/ - -TEST_CASE("Unit_hipMemAdvise_ReadMosltyMgpuTst") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int Ngpus = 0; - HIP_CHECK(hipGetDeviceCount(&Ngpus)); - if (Ngpus < 2) { - SUCCEED("This test needs atleast two gpus to run." - "Hence skipping the test.\n"); - } - int *Hmm = NULL, NumElms = (1024 * 1024), InitVal = 123, blockSize = 64; - int *Hmm1 = NULL, DataMismatch = 0; - hipStream_t strm; - HIP_CHECK(hipStreamCreate(&strm)); - HIP_CHECK(hipMallocManaged(&Hmm, (NumElms * sizeof(int)))); - // Initializing memory - for (int i = 0; i < NumElms; ++i) { - Hmm[i] = InitVal; - } - HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), - hipMemAdviseSetReadMostly, 0)); - dim3 dimBlock(blockSize, 1, 1); - dim3 dimGrid((NumElms + blockSize -1)/blockSize, 1, 1); +// Disabled on NVIDIA due to defect - EXSWHTEC-122 #if HT_AMD - SECTION("Launch Kernel on all other gpus") { - // launching kernel from each one of the gpus - for (int i = 1; i < Ngpus; ++i) { - DataMismatch = 0; - HIP_CHECK(hipSetDevice(i)); - HIP_CHECK(hipMallocManaged(&Hmm1, (NumElms * sizeof(int)))); - MemAdvise3<<>>(Hmm, Hmm1, NumElms); - HIP_CHECK(hipStreamSynchronize(strm)); - // verifying the results - for (int j = 0; j < NumElms; ++j) { - if (Hmm1[j] != (InitVal + 10)) { - DataMismatch++; - } - } - if (DataMismatch != 0) { - WARN("DataMismatch is observed with the gpu: " << i); - REQUIRE(false); - } - HIP_CHECK(hipFree(Hmm1)); - } - } - - SECTION("Launch Kernel on all other gpus and manipulate the content") { - for (int i = 0; i < Ngpus; ++i) { - DataMismatch = 0; - HIP_CHECK(hipSetDevice(i)); - HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), - hipMemAdviseSetReadMostly, i)); - MemAdvise2<<>>(Hmm, NumElms); - HIP_CHECK(hipStreamSynchronize(strm)); - } - // verifying the final result - for (int i = 0; i < NumElms; ++i) { - if (Hmm[i] != (InitVal + Ngpus * 10)) { - DataMismatch++; - } - } - - if (DataMismatch != 0) { - WARN("DataMismatch is observed at line: " << __LINE__); - REQUIRE(false); - } - } + SECTION("Invalid advice") { + HIP_CHECK_ERROR(hipMemAdvise(alloc.ptr(), kPageSize, static_cast(-1), device), + hipErrorInvalidValue); + } #endif - HIP_CHECK(hipFree(Hmm)); - HIP_CHECK(hipStreamDestroy(strm)); - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); + + const auto advice = GENERATE(hipMemAdviseSetAccessedBy, hipMemAdviseSetReadMostly, + hipMemAdviseSetPreferredLocation); + SECTION("count == 0") { + HIP_CHECK_ERROR(hipMemAdvise(alloc.ptr(), 0, advice, device), hipErrorInvalidValue); } -} +// Disabled due to defect - EXSWHTEC-131 +#if HT_NVIDIA + SECTION("count larger than allocation size") { + HIP_CHECK_ERROR(hipMemAdvise(alloc.ptr(), kPageSize + 1, advice, device), hipErrorInvalidValue); + } +#endif -TEST_CASE("Unit_hipMemAdvise_TstSetUnsetPrfrdLoc") { - int managed = HmmAttrPrint(); - if (managed == 1) { - int *Hmm = NULL, data = 999; - HIP_CHECK(hipMallocManaged(&Hmm, 2*4096)); - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseSetPreferredLocation, 0)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributePreferredLocation, Hmm, 2*4096)); - if (data != 0) { - WARN("Didnt receive expected value!!"); - REQUIRE(false); - } - HIP_CHECK(hipMemAdvise(Hmm, 2*4096, hipMemAdviseUnsetPreferredLocation, 0)); - HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), - hipMemRangeAttributePreferredLocation, Hmm, 2*4096)); - if (data != -2) { - WARN("Didnt receive expected value!!"); - REQUIRE(false); - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); + SECTION("dev_ptr == nullptr") { + HIP_CHECK_ERROR(hipMemAdvise(nullptr, kPageSize, advice, device), hipErrorInvalidValue); + } + + SECTION("dev_ptr pointing to non-managed memory") { + LinearAllocGuard alloc(LinearAllocs::hipMalloc, kPageSize); + HIP_CHECK_ERROR(hipMemAdvise(alloc.ptr(), kPageSize, advice, device), hipErrorInvalidValue); } -} +// Disabled on AMD due to defect - EXSWHTEC-130 +#if HT_NVIDIA + SECTION("Invalid device") { + HIP_CHECK_ERROR(hipMemAdvise(alloc.ptr(), kPageSize, advice, hipInvalidDeviceId), + (advice == hipMemAdviseSetReadMostly ? hipSuccess : hipErrorInvalidDevice)); + } +#endif +} \ No newline at end of file diff --git a/tests/catch/unit/memory/hipMemAdvise_old.cc b/tests/catch/unit/memory/hipMemAdvise_old.cc new file mode 100644 index 0000000000..23e2a43ad8 --- /dev/null +++ b/tests/catch/unit/memory/hipMemAdvise_old.cc @@ -0,0 +1,924 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* Test Case Description: + Scenario-1: The following Function Tests the working of flags which can be + assigned to HMM memory using hipMemAdvise() api + Scenario-2: Negative tests on hipMemAdvise() api + Scenario-3: The following function tests various scenarios around the flag + 'hipMemAdviseSetPreferredLocation' using HMM memory and hipMemAdvise() api + Scenario-4: The following function tests various scenarios around the flag + 'hipMemAdviseSetReadMostly' using HMM memory and hipMemAdvise() api + Scenario-5: The following function verifies if assigning of a flag + invalidates the earlier flag which was assigned to the same memory region + using hipMemAdvise() + Scenario-6: The following function tests if peers can set + hipMemAdviseSetAccessedBy flag + on HMM memory prefetched on each of the other gpus + Scenario-7: Set AccessedBy flag and check value returned by + hipMemRangeGetAttribute() It should be -2(same is observed on cuda) + Scenario-8: Set AccessedBy flag to device 0 on Hmm memory and prefetch the + memory to device 1, then probe for AccessedBy flag using + hipMemRangeGetAttribute() we should still see the said flag is set for + device 0 + Scenario-9: 1) Set AccessedBy to device 0 followed by PreferredLocation to + device 1 check for AccessedBy flag using hipMemRangeGetAttribute() it should + return 0 + 2) Unset AccessedBy to 0 and set it to device 1 followed by + PreferredLocation to device 1, check for AccessedBy flag using + hipMemRangeGetAttribute() it should return 1 + Scenario-10: Set AccessedBy flag to HMM memory launch a kernel and then unset + AccessedBy, launch kernel. We should not have any access issues + Scenario-11: Allocate memory using aligned_alloc(), assign PreferredLocation + flag to the allocated memory and launch a kernel. Kernel should get executed + successfully without hang or segfault + Scenario-12: Allocate Hmm memory, set advise to PreferredLocation and then + get attribute using the api hipMemRangeGetAttribute() for + hipMemRangeAttributeLastPrefetchLocation the value returned should be -2 + Scenario-13: Allocate HMM memory, set PreferredLocation to device 0, Prfetch + the mem to device1, probe for hipMemRangeAttributeLastPrefetchLocation using + hipMemRangeGetAttribute(), we should get 1 + Scenario-14: Allocate HMM memory, set ReadMostly followed by + PreferredLocation, probe for hipMemRangeAttributeReadMostly and + hipMemRangeAttributePreferredLocation + using hipMemRangeGetAttribute() we should observe 1 and 0 correspondingly. + In other words setting of hipMemRangeAttributePreferredLocation should not + impact hipMemRangeAttributeReadMostly advise to the memory + Scenario-15: Allocate Hmm memory, advise it to ReadMostly for gpu: 0 and + launch kernel on all other gpus except 0. This test case may discover any + effect or access denial case arising due to setting ReadMostly only to a + particular gpu +*/ + +#include +#if __linux__ +#include +#include +#include +#endif + +// Kernel function +__global__ void MemAdvseKernel(int n, int* x) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index < n) x[index] = x[index] * x[index]; +} + +// Kernel +__global__ void MemAdvise2(int* Hmm, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (int i = index; i < n; i += stride) { + Hmm[i] = Hmm[i] + 10; + } +} + +// Kernel +__global__ void MemAdvise3(int* Hmm, int* Hmm1, int n) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (int i = index; i < n; i += stride) { + Hmm1[i] = Hmm[i] + 10; + } +} + + +static bool CheckError(hipError_t err, int LineNo) { + if (err == hipSuccess) { + WARN("Error expected but received hipSuccess at line no.:" << LineNo); + return false; + } else { + return true; + } +} + +static int HmmAttrPrint() { + int managed = 0; + WARN( + "The following are the attribute values related to HMM for" + " device 0:\n"); + HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeDirectManagedMemAccessFromHost, 0)); + WARN("hipDeviceAttributeDirectManagedMemAccessFromHost: " << managed); + HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeConcurrentManagedAccess, 0)); + WARN("hipDeviceAttributeConcurrentManagedAccess: " << managed); + HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributePageableMemoryAccess, 0)); + WARN("hipDeviceAttributePageableMemoryAccess: " << managed); + HIP_CHECK( + hipDeviceGetAttribute(&managed, hipDeviceAttributePageableMemoryAccessUsesHostPageTables, 0)); + WARN("hipDeviceAttributePageableMemoryAccessUsesHostPageTables:" << managed); + + HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0)); + WARN("hipDeviceAttributeManagedMemory: " << managed); + return managed; +} + + +// The following Function Tests the working of flags which can be assigned +// to HMM memory using hipMemAdvise() api +TEST_CASE("Unit_hipMemAdvise_TstFlags") { + int MangdMem = HmmAttrPrint(); + if (MangdMem == 1) { + bool IfTestPassed = true; + int NumDevs = 0, *Outpt = nullptr; + int MEM_SIZE = 4 * 1024, A_CONST = 9999; + float* Hmm = nullptr; + int AttrVal = 0; + HIP_CHECK(hipGetDeviceCount(&NumDevs)); + Outpt = new int[NumDevs]; + HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE * 2, hipMemAttachGlobal)); + // With the following for loop we iterate through each of the Gpus in the + // system set and unset the flags and check the behavior. + for (int i = 0; i < NumDevs; ++i) { + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE * 2, hipMemAdviseSetReadMostly, i)); + HIP_CHECK(hipMemRangeGetAttribute(&AttrVal, sizeof(AttrVal), hipMemRangeAttributeReadMostly, + Hmm, MEM_SIZE * 2)); + if (AttrVal != 1) { + WARN("Attempt to set hipMemAdviseSetReadMostly flag failed!\n"); + IfTestPassed = false; + } + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE * 2, hipMemAdviseUnsetReadMostly, i)); + + HIP_CHECK(hipMemRangeGetAttribute(&AttrVal, sizeof(AttrVal), hipMemRangeAttributeReadMostly, + Hmm, (MEM_SIZE * 2))); + if (AttrVal != 0) { + WARN("Attempt to Unset hipMemAdviseSetReadMostly flag failed!\n"); + IfTestPassed = false; + } + AttrVal = A_CONST; + // Currently hipMemAdviseSetPreferredLocation and + // hipMemAdviseSetAccessedBy + // flags are resulting in issues: SWDEV-267357 + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE * 2, hipMemAdviseSetPreferredLocation, i)); + HIP_CHECK(hipMemRangeGetAttribute( + &AttrVal, sizeof(AttrVal), hipMemRangeAttributePreferredLocation, Hmm, (MEM_SIZE * 2))); + if (AttrVal != i) { + WARN("Attempt to set hipMemAdviseSetPreferredLocation flag failed!\n"); + IfTestPassed = false; + } + AttrVal = A_CONST; + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE * 2, hipMemAdviseUnsetPreferredLocation, i)); + HIP_CHECK(hipMemRangeGetAttribute( + &AttrVal, sizeof(AttrVal), hipMemRangeAttributePreferredLocation, Hmm, (MEM_SIZE * 2))); + if (AttrVal == i) { + WARN("Attempt to Unset hipMemAdviseUnsetPreferredLocation "); + WARN("flag failed!\n"); + IfTestPassed = false; + } + for (int m = 0; m < NumDevs; ++m) { + Outpt[m] = A_CONST; + } + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE * 2, hipMemAdviseSetAccessedBy, i)); + HIP_CHECK(hipMemRangeGetAttribute(Outpt, sizeof(Outpt), hipMemRangeAttributeAccessedBy, Hmm, + (MEM_SIZE * 2))); + if ((Outpt[0]) != i) { + WARN("Attempt to set hipMemAdviseSetAccessedBy flag failed!\n"); + IfTestPassed = false; + } + for (int m = 0; m < NumDevs; ++m) { + Outpt[m] = A_CONST; + } + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE * 2, hipMemAdviseUnsetAccessedBy, i)); + HIP_CHECK(hipMemRangeGetAttribute(Outpt, sizeof(Outpt), hipMemRangeAttributeAccessedBy, Hmm, + (MEM_SIZE * 2))); + if ((Outpt[0]) >= 0) { + WARN("Attempt to Unset hipMemAdviseUnsetAccessedBy flag failed!\n"); + IfTestPassed = false; + } + } + delete[] Outpt; + HIP_CHECK(hipFree(Hmm)); + REQUIRE(IfTestPassed); + } else { + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + +TEST_CASE("Unit_hipMemAdvise_NegtveTsts") { + int MangdMem = HmmAttrPrint(); + if (MangdMem == 1) { + bool IfTestPassed = true; + int NumDevs = 0, MEM_SIZE = 4 * 1024; + float* Hmm = nullptr; + std::string str; + HIP_CHECK(hipGetDeviceCount(&NumDevs)); + HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE * 2, hipMemAttachGlobal)); +#if HT_AMD + // Passing invalid value(99) device param + IfTestPassed &= + CheckError(hipMemAdvise(Hmm, MEM_SIZE * 2, hipMemAdviseSetReadMostly, 99), __LINE__); + + // Passing invalid value(-12) device param + IfTestPassed &= + CheckError(hipMemAdvise(Hmm, MEM_SIZE * 2, hipMemAdviseSetReadMostly, -12), __LINE__); +#endif + // Passing NULL as first parameter instead of valid pointer to a memory + IfTestPassed &= + CheckError(hipMemAdvise(NULL, MEM_SIZE * 2, hipMemAdviseSetReadMostly, 0), __LINE__); + + // Passing 0 for count(2nd param) parameter + IfTestPassed &= CheckError(hipMemAdvise(Hmm, 0, hipMemAdviseSetReadMostly, 0), __LINE__); + + // Passing count much more than actually allocated value + IfTestPassed &= + CheckError(hipMemAdvise(Hmm, MEM_SIZE * 6, hipMemAdviseSetReadMostly, 0), __LINE__); + + REQUIRE(IfTestPassed); + } else { + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + +// The following function tests various scenarios around the flag +// 'hipMemAdviseSetPreferredLocation' using HMM memory and hipMemAdvise() api +TEST_CASE("Unit_hipMemAdvise_PrefrdLoc") { + int MangdMem = HmmAttrPrint(); + if (MangdMem == 1) { + // Check that when a page fault occurs for the memory region set to devPtr, + // the data is migrated to the destn processor + int MEM_SIZE = 4096, A_CONST = 9999; + int *Hmm = nullptr, NumDevs = 0, dev = A_CONST; + bool IfTestPassed = true; + HIP_CHECK(hipGetDeviceCount(&NumDevs)); + HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE * 3, hipMemAttachGlobal)); + for (int i = 0; i < ((MEM_SIZE * 3) / 4); ++i) { + Hmm[i] = 4; + } + for (int devId = 0; devId < NumDevs; ++devId) { + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE * 3, hipMemAdviseSetPreferredLocation, devId)); + int NumElms = ((MEM_SIZE * 3) / 4); + MemAdvseKernel<<>>(NumElms, Hmm); + int dev = A_CONST; + HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(dev), hipMemRangeAttributePreferredLocation, + Hmm, MEM_SIZE * 3)); + if (dev != devId) { + WARN("Memory observed to be not available on expected location\n"); + WARN("line no: " << __LINE__); + WARN("dev: " << dev); + IfTestPassed = false; + } + } + + // Check that when preferred location is set for a memory region, + // data can still be prefetched using hipMemPrefetchAsync + hipStream_t strm; + dev = A_CONST; + for (int devId = 0; devId < NumDevs; ++devId) { + HIP_CHECK(hipSetDevice(devId)); + HIP_CHECK(hipStreamCreate(&strm)); + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE * 3, hipMemAdviseSetPreferredLocation, devId)); + HIP_CHECK(hipMemPrefetchAsync(Hmm, MEM_SIZE * 3, devId, strm)); + HIP_CHECK(hipStreamSynchronize(strm)); + HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(dev), hipMemRangeAttributeLastPrefetchLocation, + Hmm, MEM_SIZE * 3)); + if (dev != devId) { + WARN("Memory reported to be not available at the Prefetched "); + WARN("location with device id: " << devId); + WARN("line no: " << __LINE__); + WARN("dev: " << dev); + IfTestPassed = false; + } + HIP_CHECK(hipStreamDestroy(strm)); + } + HIP_CHECK(hipFree(Hmm)); + REQUIRE(IfTestPassed); + } else { + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + +// The following function tests various scenarios around the flag +// 'hipMemAdviseSetReadMostly' using HMM memory and hipMemAdvise() api + +TEST_CASE("Unit_hipMemAdvise_ReadMostly") { + int MangdMem = HmmAttrPrint(); + if (MangdMem == 1) { + bool IfTestPassed = true; + int MEM_SIZE = 4096, A_CONST = 9999; + float* Hmm = nullptr; + HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE)); + for (uint64_t i = 0; i < (MEM_SIZE / sizeof(float)); ++i) { + Hmm[i] = A_CONST; + } + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetReadMostly, 0)); + // Checking if the data can be read after setting hipMemAdviseSetReadMostly + for (uint64_t i = 0; i < (MEM_SIZE / sizeof(float)); ++i) { + if (Hmm[i] != A_CONST) { + WARN("Didn't find expected value in Hmm memory after setting"); + WARN(" hipMemAdviseSetReadMostly flag line no.: " << __LINE__); + IfTestPassed = false; + } + } + + // Checking if the memory region can be modified + for (uint64_t i = 0; i < (MEM_SIZE / sizeof(float)); ++i) { + Hmm[i] = A_CONST; + } + + for (uint64_t i = 0; i < (MEM_SIZE / sizeof(float)); ++i) { + if (Hmm[i] != A_CONST) { + WARN("Didn't find expected value in Hmm memory after Modification\n"); + WARN("line no.: " << __LINE__); + IfTestPassed = false; + } + } + + int out = A_CONST; + HIP_CHECK(hipMemRangeGetAttribute(&out, 4, hipMemRangeAttributeReadMostly, Hmm, MEM_SIZE)); + if (out != 1) { + WARN("out value: " << out); + IfTestPassed = false; + } + // Checking the advise attribute after prefetch + HIP_CHECK(hipMemPrefetchAsync(Hmm, MEM_SIZE, 0, 0)); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK( + hipMemRangeGetAttribute(&out, sizeof(int), hipMemRangeAttributeReadMostly, Hmm, MEM_SIZE)); + if (out != 1) { + WARN("Attribute assigned to memory changed after calling "); + WARN("hipMemPrefetchAsync(). line no.: " << __LINE__); + WARN("out value: " << out); + IfTestPassed = false; + } + HIP_CHECK(hipFree(Hmm)); + REQUIRE(IfTestPassed); + } else { + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + +// The following function verifies if assigning of a flag invalidates the +// earlier flag which was assigned to the same memory region using +// hipMemAdvise() +TEST_CASE("Unit_hipMemAdvise_TstFlgOverrideEffect") { + int MangdMem = HmmAttrPrint(); + if (MangdMem == 1) { + bool IfTestPassed = true; + int MEM_SIZE = 4 * 4096, A_CONST = 9999; + float* Hmm = nullptr; + int NumDevs = 0, dev = A_CONST; + + HIP_CHECK(hipGetDeviceCount(&NumDevs)); + HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE, hipMemAttachGlobal)); + for (int i = 0; i < NumDevs; ++i) { + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetReadMostly, i)); + HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(int), hipMemRangeAttributeReadMostly, Hmm, + MEM_SIZE)); + if (dev != 1) { + WARN("hipMemAdviseSetReadMostly flag did not take affect despite "); + WARN("setting it using hipMemAdvise(). line no.: " << __LINE__); + IfTestPassed = false; + break; + } + dev = A_CONST; + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetPreferredLocation, i)); + HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(int), hipMemRangeAttributePreferredLocation, + Hmm, MEM_SIZE)); + if (dev != i) { + WARN("hipMemAdviseSetPreferredLocation flag did not take affect "); + WARN("despite setting it using hipMemAdvise()\n"); + WARN("line no.: " << __LINE__); + IfTestPassed = false; + break; + } + + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetAccessedBy, i)); + dev = A_CONST; + HIP_CHECK(hipMemRangeGetAttribute(&dev, sizeof(int), hipMemRangeAttributeAccessedBy, Hmm, + MEM_SIZE)); + if (dev != i) { + WARN("hipMemAdviseSetAccessedBy flag did not take affect despite "); + WARN("setting it using hipMemAdvise(). line no.: " << __LINE__); + IfTestPassed = false; + break; + } + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseUnsetAccessedBy, i)); + } + HIP_CHECK(hipFree(Hmm)); + REQUIRE(IfTestPassed); + } else { + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + + +// The following function tests if peers can set hipMemAdviseSetAccessedBy flag +// on HMM memory prefetched on each of the other gpus +#if HT_AMD +TEST_CASE("Unit_hipMemAdvise_TstAccessedByPeer") { + int MangdMem = HmmAttrPrint(); + if (MangdMem == 1) { + bool IfTestPassed = true; + int *Hmm = nullptr, MEM_SIZE = 4 * 4096, A_CONST = 9999; + ; + int NumDevs = 0, CanAccessPeer = A_CONST, flag = 0; + + HIP_CHECK(hipGetDeviceCount(&NumDevs)); + if (NumDevs < 2) { + SUCCEED( + "Test TestSetAccessedByPeer() need atleast two Gpus to test" + " the scenario. This system has GPUs less than 2"); + } + HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE, hipMemAttachGlobal)); + for (int i = 0; i < NumDevs; ++i) { + HIP_CHECK(hipMemPrefetchAsync(Hmm, MEM_SIZE, i, 0)); + for (int j = 0; j < NumDevs; ++j) { + if (i == j) continue; + HIP_CHECK(hipSetDevice(j)); + HIP_CHECK(hipDeviceCanAccessPeer(&CanAccessPeer, j, i)); + if (CanAccessPeer) { + HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetAccessedBy, j)); + for (uint64_t m = 0; m < (MEM_SIZE / sizeof(int)); ++m) { + Hmm[m] = 4; + } + HIP_CHECK(hipDeviceEnablePeerAccess(i, 0)); + MemAdvseKernel<<<(MEM_SIZE / sizeof(int) / 32), 32>>>((MEM_SIZE / sizeof(int)), Hmm); + HIP_CHECK(hipDeviceSynchronize()); + // Verifying the result + for (uint64_t m = 0; m < (MEM_SIZE / sizeof(int)); ++m) { + if (Hmm[m] != 16) { + flag = 1; + } + } + if (flag) { + WARN("Didnt get Expected results with device: " << j); + WARN("line no.: " << __LINE__); + IfTestPassed = false; + flag = 0; + } + } + } + } + HIP_CHECK(hipFree(Hmm)); + REQUIRE(IfTestPassed); + } else { + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} +#endif + + +/* Set AccessedBy flag and check value returned by hipMemRangeGetAttribute() + It should be -2(same is observed on cuda)*/ +TEST_CASE("Unit_hipMemAdvise_TstAccessedByFlg") { + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Hmm = NULL, data = 999; + HIP_CHECK(hipMallocManaged(&Hmm, 2 * 4096)); + HIP_CHECK(hipMemAdvise(Hmm, 2 * 4096, hipMemAdviseSetAccessedBy, 0)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), hipMemRangeAttributeLastPrefetchLocation, + Hmm, 2 * 4096)); + if (data != -2) { + WARN("Didnt get expected value!!\n"); + REQUIRE(false); + } + } else { + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + +/* Set AccessedBy flag to device 0 on Hmm memory and prefetch the memory to + device 1, then probe for AccessedBy flag using hipMemRangeGetAttribute() + we should still see the said flag is set for device 0*/ +TEST_CASE("Unit_hipMemAdvise_TstAccessedByFlg2") { + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Hmm = NULL, data = 999, Ngpus = 0; + HIP_CHECK(hipGetDeviceCount(&Ngpus)); + if (Ngpus >= 2) { + hipStream_t strm; + HIP_CHECK(hipStreamCreate(&strm)); + HIP_CHECK(hipMallocManaged(&Hmm, 2 * 4096)); + HIP_CHECK(hipMemAdvise(Hmm, 2 * 4096, hipMemAdviseSetAccessedBy, 0)); + HIP_CHECK(hipMemPrefetchAsync(Hmm, 2 * 4096, 1, strm)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), hipMemRangeAttributeAccessedBy, Hmm, + 2 * 4096)); + if (data != 0) { + WARN("Didnt get expected behavior at line: " << __LINE__); + REQUIRE(false); + } + HIP_CHECK(hipMemAdvise(Hmm, 2 * 4096, hipMemAdviseUnsetAccessedBy, 0)); + HIP_CHECK(hipStreamDestroy(strm)); + HIP_CHECK(hipFree(Hmm)); + } + } else { + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + + +/* 1) Set AccessedBy to device 0 followed by PreferredLocation to device 1 + check for AccessedBy flag using hipMemRangeGetAttribute() it should + return 0 + 2) Unset AccessedBy to 0 and set it to device 1 followed by + PreferredLocation to device 1, check for AccessedBy flag using + hipMemRangeGetAttribute() it should return 1*/ + +TEST_CASE("Unit_hipMemAdvise_TstAccessedByFlg3") { + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Hmm = NULL, data = 999, Ngpus = 0; + HIP_CHECK(hipGetDeviceCount(&Ngpus)); + if (Ngpus >= 2) { + HIP_CHECK(hipMallocManaged(&Hmm, 2 * 4096)); + HIP_CHECK(hipMemAdvise(Hmm, 2 * 4096, hipMemAdviseSetAccessedBy, 0)); + HIP_CHECK(hipMemAdvise(Hmm, 2 * 4096, hipMemAdviseSetPreferredLocation, 1)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), hipMemRangeAttributeAccessedBy, Hmm, + 2 * 4096)); + if (data != 0) { + WARN("Didnt get expected behavior at line: " << __LINE__); + REQUIRE(false); + } + HIP_CHECK(hipMemAdvise(Hmm, 2 * 4096, hipMemAdviseUnsetAccessedBy, 0)); + HIP_CHECK(hipMemAdvise(Hmm, 2 * 4096, hipMemAdviseSetAccessedBy, 1)); + HIP_CHECK(hipMemAdvise(Hmm, 2 * 4096, hipMemAdviseSetPreferredLocation, 0)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), hipMemRangeAttributeAccessedBy, Hmm, + 2 * 4096)); + if (data != 1) { + WARN("Didnt get expected behavior at line: " << __LINE__); + REQUIRE(false); + } + HIP_CHECK(hipFree(Hmm)); + } + } else { + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + + +/* Set AccessedBy flag to HMM memory launch a kernel and then unset + AccessedBy, launch kernel. We should not have any access issues*/ + +TEST_CASE("Unit_hipMemAdvise_TstAccessedByFlg4") { + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Hmm = NULL, NumElms = (1024 * 1024), InitVal = 123, blockSize = 64; + int DataMismatch = 0; + hipStream_t strm; + HIP_CHECK(hipStreamCreate(&strm)); + HIP_CHECK(hipMallocManaged(&Hmm, (NumElms * sizeof(int)))); + HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), hipMemAdviseSetAccessedBy, 0)); + // Initializing memory + for (int i = 0; i < NumElms; ++i) { + Hmm[i] = InitVal; + } + dim3 dimBlock(blockSize, 1, 1); + dim3 dimGrid((NumElms + blockSize - 1) / blockSize, 1, 1); + // launching kernel from each one of the gpus + MemAdvise2<<>>(Hmm, NumElms); + HIP_CHECK(hipStreamSynchronize(strm)); + + // verifying the final result + for (int i = 0; i < NumElms; ++i) { + if (Hmm[i] != (InitVal + 10)) { + DataMismatch++; + } + } + + if (DataMismatch != 0) { + WARN("DataMismatch is observed at line: " << __LINE__); + REQUIRE(false); + } + + HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), hipMemAdviseUnsetAccessedBy, 0)); + MemAdvise2<<>>(Hmm, NumElms); + HIP_CHECK(hipStreamSynchronize(strm)); + // verifying the final result + for (int i = 0; i < NumElms; ++i) { + if (Hmm[i] != (InitVal + (2 * 10))) { + DataMismatch++; + } + } + + if (DataMismatch != 0) { + WARN("DataMismatch is observed at line: " << __LINE__); + REQUIRE(false); + } + HIP_CHECK(hipFree(Hmm)); + HIP_CHECK(hipStreamDestroy(strm)); + } else { + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + + +/* Allocate memory using aligned_alloc(), assign PreferredLocation flag to + the allocated memory and launch a kernel. Kernel should get executed + successfully without hang or segfault*/ +#if __linux__ && HT_AMD +TEST_CASE("Unit_hipMemAdvise_TstAlignedAllocMem") { + if ((setenv("HSA_XNACK", "1", 1)) != 0) { + WARN("Unable to turn on HSA_XNACK, hence terminating the Test case!"); + REQUIRE(false); + } + // The following code block checks for gfx90a so as to skip if the device is not MI200 + + hipDeviceProp_t prop; + int device; + HIP_CHECK(hipGetDevice(&device)); + HIP_CHECK(hipGetDeviceProperties(&prop, device)); + std::string gfxName(prop.gcnArchName); + + if ((gfxName == "gfx90a" || gfxName.find("gfx90a:")) == 0) { + int stat = 0; + if (fork() == 0) { + // The below part should be inside fork + int managedMem = 0, pageMemAccess = 0; + HIP_CHECK(hipDeviceGetAttribute(&pageMemAccess, hipDeviceAttributePageableMemoryAccess, 0)); + WARN("hipDeviceAttributePageableMemoryAccess:" << pageMemAccess); + + HIP_CHECK(hipDeviceGetAttribute(&managedMem, hipDeviceAttributeManagedMemory, 0)); + WARN("hipDeviceAttributeManagedMemory: " << managedMem); + if ((managedMem == 1) && (pageMemAccess == 1)) { + int *Mllc = nullptr, MemSz = 4096 * 4, NumElms = 4096, InitVal = 123; + // Mllc = reinterpret_cast<(int *)>(aligned_alloc(4096, MemSz)); + Mllc = reinterpret_cast(aligned_alloc(4096, 4096 * 4)); + for (int i = 0; i < NumElms; ++i) { + Mllc[i] = InitVal; + } + hipStream_t strm; + int DataMismatch = 0; + HIP_CHECK(hipStreamCreate(&strm)); + // The following hipMemAdvise() call is made to know if advise on + // aligned_alloc() is causing any issue + HIP_CHECK(hipMemAdvise(Mllc, MemSz, hipMemAdviseSetPreferredLocation, 0)); + HIP_CHECK(hipMemPrefetchAsync(Mllc, MemSz, 0, strm)); + HIP_CHECK(hipStreamSynchronize(strm)); + MemAdvise2<<<(NumElms / 32), 32, 0, strm>>>(Mllc, NumElms); + HIP_CHECK(hipStreamSynchronize(strm)); + for (int i = 0; i < NumElms; ++i) { + if (Mllc[i] != (InitVal + 10)) { + DataMismatch++; + } + } + if (DataMismatch != 0) { + WARN("DataMismatch observed!!"); + exit(9); // 9 for failure + } else { + exit(10); // 10 for Pass result + } + } else { + SUCCEED( + "GPU 0 doesn't support ManagedMemory with hipDeviceAttributePageableMemoryAccess " + "attribute. Hence skipping the testing with Pass result.\n"); + exit(Catch::ResultDisposition::ContinueOnFailure); + } + } else { + wait(&stat); + int Result = WEXITSTATUS(stat); + if (Result == Catch::ResultDisposition::ContinueOnFailure) { + WARN( + "GPU 0 doesn't support ManagedMemory with hipDeviceAttributePageableMemoryAccess " + "attribute. Hence skipping the testing with Pass result.\n"); + } else { + if (Result != 10) { + REQUIRE(false); + } + } + } + } else { + SUCCEED( + "Memory model feature is only supported for gfx90a, Hence" + "skipping the testcase for this GPU " + << device); + WARN( + "Memory model feature is only supported for gfx90a, Hence" + "skipping the testcase for this GPU " + << device); + } +} +#endif + +/* Allocate Hmm memory, set advise to PreferredLocation and then get + attribute using the api hipMemRangeGetAttribute() for + hipMemRangeAttributeLastPrefetchLocation the value returned should be -2*/ + +TEST_CASE("Unit_hipMemAdvise_TstMemAdvisePrefrdLoc") { + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Hmm = NULL, data = 999; + HIP_CHECK(hipMallocManaged(&Hmm, 4096)); + HIP_CHECK(hipMemAdvise(Hmm, 4096, hipMemAdviseSetPreferredLocation, 0)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), hipMemRangeAttributeLastPrefetchLocation, + Hmm, 4096)); + if (data != -2) { + WARN("Didnt receive expected value."); + REQUIRE(false); + } + } else { + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + + +/* Allocate HMM memory, set PreferredLocation to device 0, Prfetch the mem + to device1, probe for hipMemRangeAttributeLastPrefetchLocation using + hipMemRangeGetAttribute(), we should get 1*/ + +TEST_CASE("Unit_hipMemAdvise_TstMemAdviseLstPreftchLoc") { + int NumDevs = 0; + HIP_CHECK(hipGetDeviceCount(&NumDevs)); + if (NumDevs >= 2) { + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Hmm = NULL, data = 999; + hipStream_t strm; + HIP_CHECK(hipSetDevice(1)); + HIP_CHECK(hipStreamCreate(&strm)); + HIP_CHECK(hipMallocManaged(&Hmm, 4096)); + HIP_CHECK(hipMemAdvise(Hmm, 4096, hipMemAdviseSetPreferredLocation, 0)); + HIP_CHECK(hipMemPrefetchAsync(Hmm, 4096, 1, strm)); + HIP_CHECK(hipStreamSynchronize(strm)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), + hipMemRangeAttributeLastPrefetchLocation, Hmm, 4096)); + if (data != 1) { + WARN("Didnt receive expected value!!"); + REQUIRE(false); + } + } else { + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } + } else { + SUCCEED("This system has less than 2 gpus hence skipping the test.\n"); + } +} + + +/* Allocate HMM memory, set ReadMostly followed by PreferredLocation, probe + for hipMemRangeAttributeReadMostly and hipMemRangeAttributePreferredLocation + using hipMemRangeGetAttribute() we should observe 1 and 0 correspondingly. + In other words setting of hipMemRangeAttributePreferredLocation should not + impact hipMemRangeAttributeReadMostly advise to the memory*/ + +TEST_CASE("Unit_hipMemAdvise_TstMemAdviseMultiFlag") { + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Hmm = NULL, data = 999; + HIP_CHECK(hipMallocManaged(&Hmm, 4096)); + HIP_CHECK(hipMemAdvise(Hmm, 4096, hipMemAdviseSetReadMostly, 0)); + HIP_CHECK(hipMemAdvise(Hmm, 4096, hipMemAdviseSetPreferredLocation, 0)); + HIP_CHECK( + hipMemRangeGetAttribute(&data, sizeof(int), hipMemRangeAttributeReadMostly, Hmm, 4096)); + if (data != 1) { + WARN("Didnt receive expected value at line: " << data); + REQUIRE(false); + } + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), hipMemRangeAttributePreferredLocation, + Hmm, 4096)); + if (data != 0) { + WARN("Didnt receive expected value at line: " << data); + REQUIRE(false); + } + HIP_CHECK(hipFree(Hmm)); + } else { + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + + +/*Allocate Hmm memory, advise it to ReadMostly for gpu: 0 and launch kernel + on all other gpus except 0. This test case may discover any effect or + access denial case arising due to setting ReadMostly only to a particular + gpu*/ + +TEST_CASE("Unit_hipMemAdvise_ReadMosltyMgpuTst") { + int managed = HmmAttrPrint(); + if (managed == 1) { + int Ngpus = 0; + HIP_CHECK(hipGetDeviceCount(&Ngpus)); + if (Ngpus < 2) { + SUCCEED( + "This test needs atleast two gpus to run." + "Hence skipping the test.\n"); + } + int *Hmm = NULL, NumElms = (1024 * 1024), InitVal = 123, blockSize = 64; + int *Hmm1 = NULL, DataMismatch = 0; + hipStream_t strm; + HIP_CHECK(hipStreamCreate(&strm)); + HIP_CHECK(hipMallocManaged(&Hmm, (NumElms * sizeof(int)))); + // Initializing memory + for (int i = 0; i < NumElms; ++i) { + Hmm[i] = InitVal; + } + HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), hipMemAdviseSetReadMostly, 0)); + dim3 dimBlock(blockSize, 1, 1); + dim3 dimGrid((NumElms + blockSize - 1) / blockSize, 1, 1); +#if HT_AMD + SECTION("Launch Kernel on all other gpus") { + // launching kernel from each one of the gpus + for (int i = 1; i < Ngpus; ++i) { + DataMismatch = 0; + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipMallocManaged(&Hmm1, (NumElms * sizeof(int)))); + MemAdvise3<<>>(Hmm, Hmm1, NumElms); + HIP_CHECK(hipStreamSynchronize(strm)); + // verifying the results + for (int j = 0; j < NumElms; ++j) { + if (Hmm1[j] != (InitVal + 10)) { + DataMismatch++; + } + } + if (DataMismatch != 0) { + WARN("DataMismatch is observed with the gpu: " << i); + REQUIRE(false); + } + HIP_CHECK(hipFree(Hmm1)); + } + } + + SECTION("Launch Kernel on all other gpus and manipulate the content") { + for (int i = 0; i < Ngpus; ++i) { + DataMismatch = 0; + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), hipMemAdviseSetReadMostly, i)); + MemAdvise2<<>>(Hmm, NumElms); + HIP_CHECK(hipStreamSynchronize(strm)); + } + // verifying the final result + for (int i = 0; i < NumElms; ++i) { + if (Hmm[i] != (InitVal + Ngpus * 10)) { + DataMismatch++; + } + } + + if (DataMismatch != 0) { + WARN("DataMismatch is observed at line: " << __LINE__); + REQUIRE(false); + } + } +#endif + HIP_CHECK(hipFree(Hmm)); + HIP_CHECK(hipStreamDestroy(strm)); + } else { + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +} + + +TEST_CASE("Unit_hipMemAdvise_TstSetUnsetPrfrdLoc") { + int managed = HmmAttrPrint(); + if (managed == 1) { + int *Hmm = NULL, data = 999; + HIP_CHECK(hipMallocManaged(&Hmm, 2 * 4096)); + HIP_CHECK(hipMemAdvise(Hmm, 2 * 4096, hipMemAdviseSetPreferredLocation, 0)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), hipMemRangeAttributePreferredLocation, + Hmm, 2 * 4096)); + if (data != 0) { + WARN("Didnt receive expected value!!"); + REQUIRE(false); + } + HIP_CHECK(hipMemAdvise(Hmm, 2 * 4096, hipMemAdviseUnsetPreferredLocation, 0)); + HIP_CHECK(hipMemRangeGetAttribute(&data, sizeof(int), hipMemRangeAttributePreferredLocation, + Hmm, 2 * 4096)); + if (data != -2) { + WARN("Didnt receive expected value!!"); + REQUIRE(false); + } + } else { + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory " + "attribute. Hence skipping the testing with Pass result.\n"); + } +}