diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc index 4c3ade5cf1d29..8112276642dfa 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc @@ -813,6 +813,16 @@ public: const ext::oneapi::experimental::image_mem_handle Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &ImageDesc); + + // Device to device copy with offsets and extent + void ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent) }; class queue { @@ -954,6 +964,34 @@ public: ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &ImageDesc, const std::vector &DepEvents); + + // Device to device copy with offsets and extent + void ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent) + void ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent + event DepEvent) + void ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent + const std::vector &DepEvents) }; } ``` @@ -2903,4 +2941,6 @@ These features still need to be handled: |6.1|2024-09-09| - Update for image-array sub-region copy support. |6.2|2024-09-26| - Added addressing mode `ext_oneapi_clamp_to_border` value, equivalent to `clamp`, to match with external APIs. +|6.3|2024-10-02| - Add support for `image_mem_handle` to `image_mem_handle` + sub-region copies. |====================== diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp index 777ddecd887d5..abb6c3c8f3240 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp @@ -1609,6 +1609,58 @@ inline event queue::ext_oneapi_copy( CodeLoc); } +inline event queue::ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent, const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset, + DestImgDesc, CopyExtent); + }, + CodeLoc); +} + +inline event queue::ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent, event DepEvent, + const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvent); + CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset, + DestImgDesc, CopyExtent); + }, + CodeLoc); +} + +inline event queue::ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent, const std::vector &DepEvents, + const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvents); + CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset, + DestImgDesc, CopyExtent); + }, + CodeLoc); +} + inline event queue::ext_oneapi_copy( const void *Src, sycl::range<3> SrcOffset, void *Dest, sycl::range<3> DestOffset, diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 272cd96ad2e3e..4e2e87cbe5a94 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -3143,7 +3143,7 @@ class __SYCL_EXPORT handler { /// incomplete. /// /// \param Src is an opaque image memory handle to the source memory. - /// \param SrcOffset is an offset from the origin of source measured in pixels + /// \param SrcOffset is an offset from the source origin measured in pixels /// (pixel size determined by \p SrcImgDesc ) /// \param SrcImgDesc is the source image descriptor /// \param Dest is a USM pointer to the destination memory. @@ -3189,6 +3189,32 @@ class __SYCL_EXPORT handler { ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &ImageDesc); + /// Copies data from device to device memory, where \p Src and \p Dest + /// are opaque image memory handles. Allows for a sub-region copy, where + /// \p SrcOffset, \p DestOffset and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p SrcImgDesc + /// An exception is thrown if either \p Src or \p Dest is incomplete. + /// + /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcOffset is an offset from the source origin measured in pixels + /// (pixel size determined by \p SrcImgDesc ) + /// \param SrcImgDesc is the source image descriptor + /// \param Dest is an opaque image memory handle to the destination memory. + /// \param DestOffset is an offset from the destination origin measured in + /// pixels (pixel size determined by \p DestImgDesc ) + /// \param DestImgDesc is the destination image descriptor + /// \param CopyExtent is the width, height, and depth of the region to copy + /// measured in pixels (pixel size determined by + /// \p SrcImgDesc ) + void ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent); + /// Copies data from one memory region to another, where \p Src and \p Dest /// are USM pointers. Allows for a sub-region copy, where \p SrcOffset , /// \p DestOffset , and \p Extent are used to determine the sub-region. diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 39f69046ad2aa..8ce8eb357b4f9 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -1757,6 +1757,93 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { const ext::oneapi::experimental::image_descriptor &ImageDesc, const detail::code_location &CodeLoc = detail::code_location::current()); + /// Copies data from device to device memory, where \p Src and \p Dest + /// are opaque image memory handles. Allows for a sub-region copy, where + /// \p SrcOffset, \p DestOffset and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p SrcImgDesc + /// An exception is thrown if either \p Src or \p Dest is incomplete. + /// + /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcOffset is an offset from the origin of source measured in pixels + /// (pixel size determined by \p SrcImgDesc ) + /// \param SrcImgDesc is the source image descriptor + /// \param Dest is an opaque image memory handle to the destination memory. + /// \param DestOffset is an offset from the origin of destination measured in + /// pixels (pixel size determined by \p DestImgDesc ) + /// \param DestImgDesc is the destination image descriptor + /// \param CopyExtent is the width, height, and depth of the region to copy + /// measured in pixels (pixel size determined by + /// \p SrcImgDesc ) + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from device to device memory, where \p Src and \p Dest + /// are opaque image memory handles. Allows for a sub-region copy, where + /// \p SrcOffset, \p DestOffset and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p SrcImgDesc + /// An exception is thrown if either \p Src or \p Dest is incomplete. + /// + /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcOffset is an offset from the origin of source measured in pixels + /// (pixel size determined by \p SrcImgDesc ) + /// \param SrcImgDesc is the source image descriptor + /// \param Dest is an opaque image memory handle to the destination memory. + /// \param DestOffset is an offset from the origin of destination measured in + /// pixels (pixel size determined by \p DestImgDesc ) + /// \param DestImgDesc is the destination image descriptor + /// \param CopyExtent is the width, height, and depth of the region to copy + /// measured in pixels (pixel size determined by + /// \p SrcImgDesc ) + /// \param DepEvent is an event that specifies the kernel dependencies. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent, event DepEvent, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from device to device memory, where \p Src and \p Dest + /// are opaque image memory handles. Allows for a sub-region copy, where + /// \p SrcOffset, \p DestOffset and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p SrcImgDesc + /// An exception is thrown if either \p Src or \p Dest is incomplete. + /// + /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcOffset is an offset from the origin of source measured in pixels + /// (pixel size determined by \p SrcImgDesc ) + /// \param srcImgDesc is the source image descriptor + /// \param Dest is an opaque image memory handle to the destination memory. + /// \param DestOffset is an offset from the origin of destination measured in + /// pixels (pixel size determined by \p DestImgDesc ) + /// \param DestImgDesc is the destination image descriptor + /// \param CopyExtent is the width, height, and depth of the region to copy + /// measured in pixels (pixel size determined by + /// \p SrcImgDesc ) + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent, const std::vector &DepEvents, + const detail::code_location &CodeLoc = detail::code_location::current()); + /// Copies data from one memory region to another, where \p Src and \p Dest /// are USM pointers. Allows for a sub-region copy, where \p SrcOffset , /// \p DestOffset , and \p Extent are used to determine the sub-region. diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 980eb8ee52301..e4c06ad02c1cf 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1248,6 +1248,105 @@ void handler::ext_oneapi_copy( setType(detail::CGType::CopyImage); } +void handler::ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_bindless_images>(); + SrcImgDesc.verify(); + DestImgDesc.verify(); + + auto isOutOfRange = [](const sycl::range<3> &range, + const sycl::range<3> &offset, + const sycl::range<3> ©Extent) { + sycl::range<3> result = (range > 0UL && ((offset + copyExtent) > range)); + + return (static_cast(result[0]) || static_cast(result[1]) || + static_cast(result[2])); + }; + + sycl::range<3> SrcImageSize = {SrcImgDesc.width, SrcImgDesc.height, + SrcImgDesc.depth}; + sycl::range<3> DestImageSize = {DestImgDesc.width, DestImgDesc.height, + DestImgDesc.depth}; + + if (isOutOfRange(SrcImageSize, SrcOffset, CopyExtent) || + isOutOfRange(DestImageSize, DestOffset, CopyExtent)) { + throw sycl::exception( + make_error_code(errc::invalid), + "Image copy attempted to access out of bounds memory!"); + } + + MSrcPtr = reinterpret_cast(Src.raw_handle); + MDstPtr = reinterpret_cast(Dest.raw_handle); + + ur_image_desc_t UrSrcDesc = {}; + UrSrcDesc.width = SrcImgDesc.width; + UrSrcDesc.height = SrcImgDesc.height; + UrSrcDesc.depth = SrcImgDesc.depth; + UrSrcDesc.arraySize = SrcImgDesc.array_size; + + ur_image_desc_t UrDestDesc = {}; + UrDestDesc.width = DestImgDesc.width; + UrDestDesc.height = DestImgDesc.height; + UrDestDesc.depth = DestImgDesc.depth; + UrDestDesc.arraySize = DestImgDesc.array_size; + + auto fill_image_type = + [](const ext::oneapi::experimental::image_descriptor &Desc, + ur_image_desc_t &UrDesc) { + if (Desc.array_size > 1) { + // Image Array. + UrDesc.type = Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY + : UR_MEM_TYPE_IMAGE1D_ARRAY; + + // Cubemap. + UrDesc.type = + Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap + ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP + : UrDesc.type; + } else { + UrDesc.type = Desc.depth > 0 + ? UR_MEM_TYPE_IMAGE3D + : (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D + : UR_MEM_TYPE_IMAGE1D); + } + }; + + fill_image_type(SrcImgDesc, UrSrcDesc); + fill_image_type(DestImgDesc, UrDestDesc); + + auto fill_format = [](const ext::oneapi::experimental::image_descriptor &Desc, + ur_image_format_t &UrFormat) { + UrFormat.channelType = + sycl::_V1::detail::convertChannelType(Desc.channel_type); + UrFormat.channelOrder = sycl::detail::convertChannelOrder( + sycl::_V1::ext::oneapi::experimental::detail:: + get_image_default_channel_order(Desc.num_channels)); + }; + + ur_image_format_t UrSrcFormat; + ur_image_format_t UrDestFormat; + + fill_format(SrcImgDesc, UrSrcFormat); + fill_format(DestImgDesc, UrDestFormat); + + impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; + impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; + impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; + impl->MSrcImageDesc = UrSrcDesc; + impl->MDstImageDesc = UrDestDesc; + impl->MSrcImageFormat = UrSrcFormat; + impl->MDstImageFormat = UrDestFormat; + impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE; + setType(detail::CGType::CopyImage); +} + void handler::ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, sycl::range<3> SrcOffset, diff --git a/sycl/test-e2e/bindless_images/device_to_device_copy_1D_subregion.cpp b/sycl/test-e2e/bindless_images/device_to_device_copy_1D_subregion.cpp new file mode 100644 index 0000000000000..250195358011a --- /dev/null +++ b/sycl/test-e2e/bindless_images/device_to_device_copy_1D_subregion.cpp @@ -0,0 +1,172 @@ +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +namespace syclexp = sycl::ext::oneapi::experimental; + +void copy_image_mem_handle_to_image_mem_handle( + const syclexp::image_descriptor &dataInDesc, + const syclexp::image_descriptor &outDesc, + const std::vector &dataIn1, const std::vector &dataIn2, + sycl::device dev, sycl::queue q, std::vector &out) { + + // Check that output image is double size of input images + assert(outDesc.width == dataInDesc.width * 2); + + syclexp::image_mem imgMemSrc1(dataInDesc, dev, q.get_context()); + syclexp::image_mem imgMemSrc2(dataInDesc, dev, q.get_context()); + syclexp::image_mem imgMemDst(outDesc, dev, q.get_context()); + + // Copy input data to device + q.ext_oneapi_copy(dataIn1.data(), imgMemSrc1.get_handle(), dataInDesc); + q.ext_oneapi_copy(dataIn2.data(), imgMemSrc2.get_handle(), dataInDesc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {dataInDesc.width / 2, 1, 1}; + + // Copy first half of imgMemSrcOne to first quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc1.get_handle(), {0, 0, 0}, dataInDesc, + imgMemDst.get_handle(), {0, 0, 0}, outDesc, copyExtent); + + // Copy second half of imgMemSrcOne to second quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc1.get_handle(), {dataInDesc.width / 2, 0, 0}, + dataInDesc, imgMemDst.get_handle(), + {outDesc.width / 4, 0, 0}, outDesc, copyExtent); + + // Copy first half of imgMemSrcTwo to third quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc2.get_handle(), {0, 0, 0}, dataInDesc, + imgMemDst.get_handle(), {outDesc.width / 2, 0, 0}, outDesc, + copyExtent); + + // Copy second half of imgMemSrcTwo to fourth quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc2.get_handle(), {dataInDesc.width / 2, 0, 0}, + dataInDesc, imgMemDst.get_handle(), + {(outDesc.width / 4) * 3, 0, 0}, outDesc, copyExtent); + + q.wait_and_throw(); + + // Copy out data to host + q.ext_oneapi_copy(imgMemDst.get_handle(), out.data(), outDesc); + + q.wait_and_throw(); +} + +bool out_of_bounds_copy(const syclexp::image_descriptor &dataInDesc, + const syclexp::image_descriptor &outDesc, + const std::vector &dataIn, sycl::device dev, + sycl::queue q) { + + // Check that output image is double size of input images + assert(outDesc.width == dataInDesc.width * 2); + + syclexp::image_mem imgMemSrc(dataInDesc, dev, q.get_context()); + syclexp::image_mem imgMemDst(outDesc, dev, q.get_context()); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), dataInDesc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {(dataInDesc.width / 2) + 1, 1, 1}; + + try { + // Perform out of bound copy! + q.ext_oneapi_copy(imgMemSrc.get_handle(), {dataInDesc.width / 2, 0, 0}, + dataInDesc, imgMemDst.get_handle(), + {(outDesc.width / 4) * 3, 0, 0}, outDesc, copyExtent); + } catch (sycl::exception e) { + return true; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return false; + } + + return false; +} + +bool check_test(const std::vector &out, + const std::vector &expected) { + assert(out.size() == expected.size()); + bool validated = true; + for (int i = 0; i < out.size(); i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + return validated; +} + +template +bool run_copy_test(sycl::device &dev, sycl::queue &q, sycl::range<1> dims) { + std::vector dataIn1(dims.size() / 2); + std::vector dataIn2(dims.size() / 2); + std::vector out(dims.size()); + + std::vector expected(dims.size()); + + // Create two sets of input data. Each half the size of the output + // and one beginning sequentually after the other. + std::iota(dataIn1.begin(), dataIn1.end(), 0); + std::iota(dataIn2.begin(), dataIn2.end(), (dataIn2.size())); + + // Set expected to be sequential + std::iota(expected.begin(), expected.end(), 0); + + syclexp::image_descriptor outDesc = + syclexp::image_descriptor(dims, channelNum, channelType); + syclexp::image_descriptor dataInDesc = + syclexp::image_descriptor(dims / 2, channelNum, channelType); + + // Perform copy + copy_image_mem_handle_to_image_mem_handle(dataInDesc, outDesc, dataIn1, + dataIn2, dev, q, out); + + bool copyValidated = check_test(out, expected); + + bool exceptionValidated = + out_of_bounds_copy(dataInDesc, outDesc, dataIn1, dev, q); + + return copyValidated && exceptionValidated; +} + +int main() { + + sycl::device dev; + sycl::queue q(dev); + + bool validated = + run_copy_test<1, sycl::image_channel_type::fp32>(dev, q, {12}); + + if (!validated) { + std::cout << "Tests failed\n"; + return 1; + } + + std::cout << "Tests passed\n"; + + return 0; +} diff --git a/sycl/test-e2e/bindless_images/device_to_device_copy_2D_subregion.cpp b/sycl/test-e2e/bindless_images/device_to_device_copy_2D_subregion.cpp new file mode 100644 index 0000000000000..0dea97a3f745e --- /dev/null +++ b/sycl/test-e2e/bindless_images/device_to_device_copy_2D_subregion.cpp @@ -0,0 +1,147 @@ +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +namespace syclexp = sycl::ext::oneapi::experimental; + +void copy_image_mem_handle_to_image_mem_handle( + const syclexp::image_descriptor &desc, const std::vector &dataIn, + sycl::device dev, sycl::queue q, std::vector &out) { + syclexp::image_mem imgMemSrc(desc, dev, q.get_context()); + syclexp::image_mem imgMemDst(desc, dev, q.get_context()); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), desc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + + // Copy four quarters of square into output image + q.ext_oneapi_copy(imgMemSrc.get_handle(), {0, 0, 0}, desc, + imgMemDst.get_handle(), {0, 0, 0}, desc, copyExtent); + + q.ext_oneapi_copy(imgMemSrc.get_handle(), {desc.width / 2, 0, 0}, desc, + imgMemDst.get_handle(), {desc.width / 2, 0, 0}, desc, + copyExtent); + + q.ext_oneapi_copy(imgMemSrc.get_handle(), {0, desc.height / 2, 0}, desc, + imgMemDst.get_handle(), {0, desc.height / 2, 0}, desc, + copyExtent); + + q.ext_oneapi_copy(imgMemSrc.get_handle(), + {desc.width / 2, desc.height / 2, 0}, desc, + imgMemDst.get_handle(), + {desc.width / 2, desc.height / 2, 0}, desc, copyExtent); + + q.wait_and_throw(); + + // Copy out data to host + q.ext_oneapi_copy(imgMemDst.get_handle(), out.data(), desc); + + q.wait_and_throw(); +} + +bool out_of_bounds_copy(const syclexp::image_descriptor &desc, + const std::vector &dataIn, sycl::device dev, + sycl::queue q) { + syclexp::image_mem imgMemSrc(desc, dev, q.get_context()); + syclexp::image_mem imgMemDst(desc, dev, q.get_context()); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), desc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + + try { + // Perform out of bound copy! + q.ext_oneapi_copy( + imgMemSrc.get_handle(), {desc.width / 2, desc.height / 2, 0}, desc, + imgMemDst.get_handle(), {desc.width / 2, (desc.height / 2) + 1, 0}, + desc, copyExtent); + } catch (sycl::exception e) { + return true; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return false; + } + + return false; +} + +bool check_test(const std::vector &out, + const std::vector &expected) { + bool validated = true; + for (int i = 0; i < out.size(); i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + return validated; +} + +template +bool run_copy_test(sycl::device &dev, sycl::queue &q, sycl::range<2> dims) { + std::vector dataIn(dims.size()); + std::iota(dataIn.begin(), dataIn.end(), 0); + + std::vector expected(dims.size()); + std::iota(expected.begin(), expected.end(), 0); + + std::vector out(dims.size()); + + syclexp::image_descriptor desc = + syclexp::image_descriptor(dims, channelNum, channelType); + + // Perform copy + copy_image_mem_handle_to_image_mem_handle(desc, dataIn, dev, q, out); + + bool copyValidated = check_test(out, expected); + + bool exceptionValidated = out_of_bounds_copy(desc, dataIn, dev, q); + + return copyValidated && exceptionValidated; +} + +int main() { + + sycl::device dev; + sycl::queue q(dev); + + bool validated = + run_copy_test<1, sycl::image_channel_type::fp32>(dev, q, {12, 12}); + + if (!validated) { + std::cout << "Tests failed\n"; + return 1; + } + + std::cout << "Tests passed\n"; + + return 0; +} diff --git a/sycl/test-e2e/bindless_images/device_to_device_copy_3D_subregion.cpp b/sycl/test-e2e/bindless_images/device_to_device_copy_3D_subregion.cpp new file mode 100644 index 0000000000000..680814bf6be77 --- /dev/null +++ b/sycl/test-e2e/bindless_images/device_to_device_copy_3D_subregion.cpp @@ -0,0 +1,166 @@ +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +namespace syclexp = sycl::ext::oneapi::experimental; + +void copy_image_mem_handle_to_image_mem_handle( + const syclexp::image_descriptor &desc, const std::vector &dataIn, + sycl::device dev, sycl::queue q, std::vector &out) { + syclexp::image_mem imgMemSrc(desc, dev, q.get_context()); + syclexp::image_mem imgMemDst(desc, dev, q.get_context()); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), desc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {desc.width / 2, desc.height / 2, desc.depth / 2}; + + // Copy eight quadrants of square into output image + q.ext_oneapi_copy(imgMemSrc.get_handle(), {0, 0, 0}, desc, + imgMemDst.get_handle(), {0, 0, 0}, desc, copyExtent); + + q.ext_oneapi_copy(imgMemSrc.get_handle(), {desc.width / 2, 0, 0}, desc, + imgMemDst.get_handle(), {desc.width / 2, 0, 0}, desc, + copyExtent); + + q.ext_oneapi_copy(imgMemSrc.get_handle(), {0, desc.height / 2, 0}, desc, + imgMemDst.get_handle(), {0, desc.height / 2, 0}, desc, + copyExtent); + + q.ext_oneapi_copy(imgMemSrc.get_handle(), + {desc.width / 2, desc.height / 2, 0}, desc, + imgMemDst.get_handle(), + {desc.width / 2, desc.height / 2, 0}, desc, copyExtent); + + q.ext_oneapi_copy(imgMemSrc.get_handle(), {0, 0, desc.depth / 2}, desc, + imgMemDst.get_handle(), {0, 0, desc.depth / 2}, desc, + copyExtent); + + q.ext_oneapi_copy(imgMemSrc.get_handle(), {desc.width / 2, 0, desc.depth / 2}, + desc, imgMemDst.get_handle(), + {desc.width / 2, 0, desc.depth / 2}, desc, copyExtent); + + q.ext_oneapi_copy(imgMemSrc.get_handle(), + {0, desc.height / 2, desc.depth / 2}, desc, + imgMemDst.get_handle(), + {0, desc.height / 2, desc.depth / 2}, desc, copyExtent); + + q.ext_oneapi_copy( + imgMemSrc.get_handle(), {desc.width / 2, desc.height / 2, desc.depth / 2}, + desc, imgMemDst.get_handle(), + {desc.width / 2, desc.height / 2, desc.depth / 2}, desc, copyExtent); + + q.wait_and_throw(); + + // Copy out data to host + q.ext_oneapi_copy(imgMemDst.get_handle(), out.data(), desc); + + q.wait_and_throw(); +} + +bool check_test(const std::vector &out, + const std::vector &expected) { + bool validated = true; + for (int i = 0; i < out.size(); i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + return validated; +} + +bool out_of_bounds_copy(const syclexp::image_descriptor &desc, + const std::vector &dataIn, sycl::device dev, + sycl::queue q) { + syclexp::image_mem imgMemSrc(desc, dev, q.get_context()); + syclexp::image_mem imgMemDst(desc, dev, q.get_context()); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), desc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {desc.width / 2, desc.height / 2, desc.depth / 2}; + + try { + // Perform out of bound copy! + q.ext_oneapi_copy(imgMemSrc.get_handle(), + {desc.width / 2, desc.height / 2, (desc.depth / 2) + 1}, + desc, imgMemDst.get_handle(), + {desc.width / 2, desc.height / 2, desc.depth / 2}, desc, + copyExtent); + } catch (sycl::exception e) { + return true; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return false; + } + + return false; +} + +template +bool run_copy_test(sycl::device &dev, sycl::queue &q, sycl::range<3> dims) { + std::vector dataIn(dims.size()); + std::iota(dataIn.begin(), dataIn.end(), 0); + + std::vector expected(dims.size()); + std::iota(expected.begin(), expected.end(), 0); + + std::vector out(dims.size()); + + syclexp::image_descriptor desc = + syclexp::image_descriptor(dims, channelNum, channelType); + + // Perform copy + copy_image_mem_handle_to_image_mem_handle(desc, dataIn, dev, q, out); + + bool copyValidated = check_test(out, expected); + + bool exceptionValidated = out_of_bounds_copy(desc, dataIn, dev, q); + + return copyValidated && exceptionValidated; +} + +int main() { + + sycl::device dev; + sycl::queue q(dev); + + bool validated = + run_copy_test<1, sycl::image_channel_type::fp32>(dev, q, {12, 12, 12}); + + if (!validated) { + std::cout << "Tests failed\n"; + return 1; + } + + std::cout << "Tests passed\n"; + + return 0; +} diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index bf5542036a2e2..4843224624b32 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3472,6 +3472,7 @@ _ZN4sycl3_V17handler13getKernelNameEv _ZN4sycl3_V17handler14addAccessorReqESt10shared_ptrINS0_6detail16AccessorImplHostEE _ZN4sycl3_V17handler14setNDRangeUsedEb _ZN4sycl3_V17handler15ext_oneapi_copyENS0_3ext6oneapi12experimental16image_mem_handleENS0_5rangeILi3EEERKNS4_16image_descriptorEPvS7_S7_S7_ +_ZN4sycl3_V17handler15ext_oneapi_copyENS0_3ext6oneapi12experimental16image_mem_handleENS0_5rangeILi3EEERKNS4_16image_descriptorES5_S7_SA_S7_ _ZN4sycl3_V17handler15ext_oneapi_copyENS0_3ext6oneapi12experimental16image_mem_handleEPvRKNS4_16image_descriptorE _ZN4sycl3_V17handler15ext_oneapi_copyENS0_3ext6oneapi12experimental16image_mem_handleES5_RKNS4_16image_descriptorE _ZN4sycl3_V17handler15ext_oneapi_copyEPKvNS0_3ext6oneapi12experimental16image_mem_handleERKNS6_16image_descriptorE @@ -3914,10 +3915,10 @@ _ZNK4sycl3_V16kernel16get_backend_infoINS0_4info6device15backend_versionEEENS0_6 _ZNK4sycl3_V16kernel16get_backend_infoINS0_4info6device7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv _ZNK4sycl3_V16kernel16get_backend_infoINS0_4info8platform7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv _ZNK4sycl3_V16kernel17get_kernel_bundleEv -_ZNK4sycl3_V16kernel3getEv +_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific19max_num_work_groupsEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi3EEEm _ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific23max_num_work_group_syncEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueE _ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific23max_num_work_group_syncEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi3EEEm -_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific19max_num_work_groupsEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi3EEEm +_ZNK4sycl3_V16kernel3getEv _ZNK4sycl3_V16kernel8get_infoINS0_4info22kernel_device_specific15work_group_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE _ZNK4sycl3_V16kernel8get_infoINS0_4info22kernel_device_specific16global_work_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE _ZNK4sycl3_V16kernel8get_infoINS0_4info22kernel_device_specific16private_mem_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 17a1e6b9fc167..09967eee5939e 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -3780,6 +3780,7 @@ ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@PEAXAEBUimage_descriptor@56723@@Z ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@U456723@AEBUimage_descriptor@56723@@Z ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@56723@PEAX111@Z +?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@56723@U456723@121@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXPEAXAEBUimage_descriptor@experimental@oneapi@ext@23@_KAEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXPEAXAEBUimage_descriptor@experimental@oneapi@ext@23@_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXPEAXAEBUimage_descriptor@experimental@oneapi@ext@23@_KV423@AEBUcode_location@detail@23@@Z @@ -3801,6 +3802,9 @@ ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@PEAX111AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@PEAX111AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@PEAX111V423@AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@U567823@121AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@U567823@121AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@U567823@121V423@AEBUcode_location@detail@23@@Z ?ext_oneapi_disable_peer_access@device@_V1@sycl@@QEAAXAEBV123@@Z ?ext_oneapi_empty@queue@_V1@sycl@@QEBA_NXZ ?ext_oneapi_enable_peer_access@device@_V1@sycl@@QEAAXAEBV123@@Z @@ -4267,8 +4271,8 @@ ?update@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@@Z ?update@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBVnode@34567@@Z ?updateAccessor@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXPEBVAccessorBaseHost@267@@Z -?updateValue@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXPEBX_K@Z ?updateValue@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXPEBVraw_kernel_arg@34567@_K@Z +?updateValue@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXPEBX_K@Z ?use_kernel_bundle@handler@_V1@sycl@@QEAAXAEBV?$kernel_bundle@$01@23@@Z ?verifyDeviceHasProgressGuarantee@handler@_V1@sycl@@AEAAXW4forward_progress_guarantee@experimental@oneapi@ext@23@W4execution_scope@56723@1@Z ?verifyUsedKernelBundleInternal@handler@_V1@sycl@@AEAAXVstring_view@detail@23@@Z