diff --git a/.github/intel-llvm-mirror-base-commit b/.github/intel-llvm-mirror-base-commit index 2b187e4d5e..859d8cbd18 100644 --- a/.github/intel-llvm-mirror-base-commit +++ b/.github/intel-llvm-mirror-base-commit @@ -1 +1 @@ -f2ac846551b0f7fb97974a87c74f7a48bc4f43f8 +fa6bc3d3c3f2e91bb066e117a9ca2618e6df0631 diff --git a/source/adapters/cuda/image.cpp b/source/adapters/cuda/image.cpp index 4d97b225cb..4f2a83d087 100644 --- a/source/adapters/cuda/image.cpp +++ b/source/adapters/cuda/image.cpp @@ -704,7 +704,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; - cpy_desc.srcPitch = pSrcImageDesc->width * PixelSizeBytes; + cpy_desc.srcPitch = pSrcImageDesc->rowPitch; if (pDstImageDesc->rowPitch == 0) { cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.dstArray = (CUarray)pDst; @@ -725,7 +725,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; cpy_desc.srcHost = pSrc; - cpy_desc.srcPitch = pSrcImageDesc->width * PixelSizeBytes; + cpy_desc.srcPitch = pSrcImageDesc->rowPitch; cpy_desc.srcHeight = pSrcImageDesc->height; cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.dstArray = (CUarray)pDst; @@ -745,7 +745,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; cpy_desc.srcHost = pSrc; - cpy_desc.srcPitch = pSrcImageDesc->width * PixelSizeBytes; + cpy_desc.srcPitch = pSrcImageDesc->rowPitch; cpy_desc.srcHeight = std::max(uint64_t{1}, pSrcImageDesc->height); cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.dstArray = (CUarray)pDst; @@ -793,7 +793,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; - cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes; + cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; cpy_desc.dstHost = pDst; if (pSrcImageDesc->rowPitch == 0) { @@ -818,7 +818,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.srcArray = as_CUArray(pSrc); cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; cpy_desc.dstHost = pDst; - cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes; + cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.dstHeight = pDstImageDesc->height; cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; @@ -838,7 +838,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.srcArray = as_CUArray(pSrc); cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; cpy_desc.dstHost = pDst; - cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes; + cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.dstHeight = std::max(uint64_t{1}, pDstImageDesc->height); cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height); diff --git a/source/adapters/hip/image.cpp b/source/adapters/hip/image.cpp index 4851b197d6..0874df4c8c 100644 --- a/source/adapters/hip/image.cpp +++ b/source/adapters/hip/image.cpp @@ -704,7 +704,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.srcY = pCopyRegion->srcOffset.y; cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; cpy_desc.dstY = pCopyRegion->dstOffset.y; - cpy_desc.srcPitch = pSrcImageDesc->width * PixelSizeBytes; + cpy_desc.srcPitch = pSrcImageDesc->rowPitch; if (pDstImageDesc->rowPitch == 0) { cpy_desc.dstMemoryType = hipMemoryTypeArray; cpy_desc.dstArray = static_cast(pDst); @@ -727,7 +727,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = hipMemoryTypeHost; cpy_desc.srcHost = pSrc; - cpy_desc.srcPitch = pSrcImageDesc->width * PixelSizeBytes; + cpy_desc.srcPitch = pSrcImageDesc->rowPitch; cpy_desc.srcHeight = pSrcImageDesc->height; cpy_desc.dstMemoryType = hipMemoryTypeArray; cpy_desc.dstArray = static_cast(pDst); @@ -749,7 +749,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = hipMemoryTypeHost; cpy_desc.srcHost = pSrc; - cpy_desc.srcPitch = pSrcImageDesc->width * PixelSizeBytes; + cpy_desc.srcPitch = pSrcImageDesc->rowPitch; cpy_desc.srcHeight = std::max(MinCopyHeight, pSrcImageDesc->height); cpy_desc.dstMemoryType = hipMemoryTypeArray; cpy_desc.dstArray = static_cast(pDst); @@ -824,7 +824,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( } cpy_desc.dstMemoryType = hipMemoryTypeHost; cpy_desc.dstHost = pDst; - cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes; + cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; UR_CHECK_ERROR(hipMemcpyParam2DAsync(&cpy_desc, Stream)); @@ -840,7 +840,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.srcArray = static_cast(const_cast(pSrc)); cpy_desc.dstMemoryType = hipMemoryTypeHost; cpy_desc.dstHost = pDst; - cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes; + cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.dstHeight = pDstImageDesc->height; cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; @@ -863,7 +863,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.srcArray = static_cast(const_cast(pSrc)); cpy_desc.dstMemoryType = hipMemoryTypeHost; cpy_desc.dstHost = pDst; - cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes; + cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.dstHeight = std::max(MinCopyHeight, pDstImageDesc->height); cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = diff --git a/source/adapters/level_zero/image_common.cpp b/source/adapters/level_zero/image_common.cpp index b34a59ad5e..9b76788b6a 100644 --- a/source/adapters/level_zero/image_common.cpp +++ b/source/adapters/level_zero/image_common.cpp @@ -779,8 +779,7 @@ ur_result_t bindlessImagesHandleCopyFlags( switch (imageCopyFlags) { case UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE: { - uint32_t SrcRowPitch = - pSrcImageDesc->width * getPixelSizeBytes(pSrcImageFormat); + uint32_t SrcRowPitch = pSrcImageDesc->rowPitch; uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; if (pDstImageDesc->rowPitch == 0) { // Copy to Non-USM memory @@ -824,8 +823,7 @@ ur_result_t bindlessImagesHandleCopyFlags( return UR_RESULT_SUCCESS; }; case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST: { - uint32_t DstRowPitch = - pDstImageDesc->width * getPixelSizeBytes(pDstImageFormat); + uint32_t DstRowPitch = pDstImageDesc->rowPitch; uint32_t DstSlicePitch = DstRowPitch * pDstImageDesc->height; if (pSrcImageDesc->rowPitch == 0) { // Copy from Non-USM memory to host diff --git a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp index 6c6eb65c92..4ae3b1795b 100644 --- a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp @@ -25,12 +25,7 @@ namespace ur_sanitizer_layer { namespace asan { -AsanInterceptor::AsanInterceptor() { - if (getContext()->Options.MaxQuarantineSizeMB) { - m_Quarantine = std::make_unique( - getContext()->Options.MaxQuarantineSizeMB * 1024 * 1024); - } -} +AsanInterceptor::AsanInterceptor() {} AsanInterceptor::~AsanInterceptor() { // We must release these objects before releasing adapters, since @@ -39,7 +34,6 @@ AsanInterceptor::~AsanInterceptor() { DeviceInfo->Shadow = nullptr; } - m_Quarantine = nullptr; m_MemBufferMap.clear(); m_KernelMap.clear(); m_ContextMap.clear(); @@ -224,7 +218,7 @@ ur_result_t AsanInterceptor::releaseMemory(ur_context_handle_t Context, } // If quarantine is disabled, USM is freed immediately - if (!m_Quarantine) { + if (!ContextInfo->m_Quarantine) { UR_LOG_L(getContext()->logger, DEBUG, "Free: {}", (void *)AllocInfo->AllocBegin); @@ -239,7 +233,8 @@ ur_result_t AsanInterceptor::releaseMemory(ur_context_handle_t Context, } // If quarantine is enabled, cache it - auto ReleaseList = m_Quarantine->put(AllocInfo->Device, AllocInfoIt); + auto ReleaseList = + ContextInfo->m_Quarantine->put(AllocInfo->Device, AllocInfoIt); if (ReleaseList.size()) { std::scoped_lock Guard(m_AllocationMapMutex); for (auto &It : ReleaseList) { diff --git a/source/loader/layers/sanitizer/asan/asan_interceptor.hpp b/source/loader/layers/sanitizer/asan/asan_interceptor.hpp index 8190bd9232..a0c69fe038 100644 --- a/source/loader/layers/sanitizer/asan/asan_interceptor.hpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.hpp @@ -16,6 +16,7 @@ #include "asan_allocator.hpp" #include "asan_buffer.hpp" #include "asan_libdevice.hpp" +#include "asan_quarantine.hpp" #include "asan_shadow.hpp" #include "asan_statistics.hpp" #include "sanitizer_common/sanitizer_common.hpp" @@ -33,8 +34,6 @@ namespace ur_sanitizer_layer { namespace asan { -class Quarantine; - struct AllocInfoList { std::vector> List; ur_shared_mutex Mutex; @@ -148,12 +147,18 @@ struct ContextInfo { std::unordered_map> InternalQueueMap; + std::optional m_Quarantine; + AsanStatsWrapper Stats; explicit ContextInfo(ur_context_handle_t Context) : Handle(Context) { [[maybe_unused]] auto Result = getContext()->urDdiTable.Context.pfnRetain(Context); assert(Result == UR_RESULT_SUCCESS); + if (getContext()->Options.MaxQuarantineSizeMB) { + m_Quarantine.emplace(getContext()->Options.MaxQuarantineSizeMB * 1024 * + 1024); + } } ~ContextInfo(); @@ -404,8 +409,6 @@ class AsanInterceptor { AllocationMap m_AllocationMap; ur_shared_mutex m_AllocationMapMutex; - std::unique_ptr m_Quarantine; - std::unordered_set m_Adapters; ur_shared_mutex m_AdaptersMutex;