diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc index 8112276642dfa..70624982d9780 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc @@ -2035,674 +2035,48 @@ void release_external_semaphore(external_semaphore semaphoreHandle, === 1D image read/write ```cpp -// Set up device, queue, and context -sycl::device device; -sycl::queue queue(device); -sycl::context context = queue.get_context(); - -// Initialize input data -constexpr size_t width = 512; -std::vector dataIn(width); -std::vector dataOut(width); -for (int i = 0; i < width; i++) { - dataIn[i] = static_cast(i); -} - -// Image descriptor - can use the same for both images -sycl::ext::oneapi::experimental::image_descriptor desc( - sycl::range{width}, 1, - sycl::ext::oneapi::experimental::image_channel_type::fp32); - -try { - // Extension: returns the device pointer to the allocated memory - sycl::ext::oneapi::experimental::image_mem imgMemoryIn(desc, queue); - sycl::ext::oneapi::experimental::image_mem imgMemoryOut(desc, queue); - - // Extension: create the image and return the handle - sycl::ext::oneapi::experimental::unsampled_image_handle imgIn = - sycl::ext::oneapi::experimental::create_image(imgMemoryIn, desc, queue); - sycl::ext::oneapi::experimental::unsampled_image_handle imgOut = - sycl::ext::oneapi::experimental::create_image(imgMemoryOut, desc, queue); - - // Extension: copy over data to device - q.ext_oneapi_copy(dataIn.data(), imgMemoryIn, desc); - - // Bindless images require manual synchronization - // Wait for copy operation to finish - q.wait_and_throw(); - - q.submit([&](sycl::handler &cgh) { - // No need to request access, handles captured by value - - cgh.parallel_for(width, [=](sycl::id<1> id) { - // Extension: read image data from handle - float pixel = sycl::ext::oneapi::experimental::fetch_image( - imgIn, int(id[0])); - - // Extension: write to image data using handle - sycl::ext::oneapi::experimental::write_image(imgOut, int(id[0]), pixel); - }); - }); - - // Using image handles requires manual synchronization - q.wait_and_throw(); - - // Copy data written to imgOut to host - q.ext_oneapi_copy(imgMemoryOut, dataOut.data(), desc); - - // Cleanup - sycl::ext::oneapi::experimental::destroy_image_handle(imgIn, queue); - sycl::ext::oneapi::experimental::destroy_image_handle(imgOut, queue); -} catch (sycl::exception e) { - std::cerr << "SYCL exception caught: " << e.what(); - exit(-1); -} +#include -// Validate that `dataIn` correctly transferred to `dataOut` -bool validated = (dataIn == dataOut); +include::../../../test-e2e/bindless_images/examples/example_1_1D_read_write.cpp[lines=9..-1] ``` === Reading from a dynamically sized array of 2D images ```cpp -// Set up device, queue, and context -sycl::device device; -sycl::queue queue(device); -sycl::context context = queue.get_context(); - -// declare image data -size_t numImages = 5; -size_t width = 8; -size_t height = 8; -size_t numPixels = width * height; -std::vector dataIn(numPixels); -std::vector dataOut(numPixels); -std::vector dataExpected(numPixels); -for (int i = 0; i < width; i++) { - for (int j = 0; j < height; j++) { - int index = j + (height * i); - dataIn[index] = index; - dataExpected[index] = index * numImages; - } -} - -// Image descriptor - can use the same for all images -sycl::ext::oneapi::experimental::image_descriptor desc( - {width, height}, 1, - sycl::ext::oneapi::experimental::image_channel_type::fp32); - -try { - - // Allocate each image and save the handles - std::vector imgAllocations; - for (int i = 0; i < numImages; i++) { - // Extension: move-construct device allocated memory - imgAllocations.emplace_back( - sycl::ext::oneapi::experimental::image_mem{desc, queue}); - } - - // Copy over data to device for each image - for (int i = 0; i < numImages; i++) { - // Extension: copy over data to device - q.ext_oneapi_copy(dataIn.data(), imgAllocations[i], desc); - } - - // Wait for copy operations to finish - q.wait_and_throw(); - - // Create the images and return the handles - std::vector - imgHandles; - for (int i = 0; i < numImages; i++) { - // Extension: create the image and return the handle - sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle = - sycl::ext::oneapi::experimental::create_image(imgAllocations[i], - desc, queue); - imgHandles.push_back(imgHandle); - } - - sycl::buffer outBuf{dataOut.data(), sycl::range{height, width}}; - sycl::buffer imgHandlesBuf{imgHandles.data(), sycl::range{numImages}}; - q.submit([&](sycl::handler &cgh) { - sycl::accessor outAcc{outBuf, cgh, sycl::write_only}; - sycl::accessor imgHandleAcc{imgHandlesBuf, cgh, sycl::read_only}; - - cgh.parallel_for( - sycl::nd_range<2>{{width, height}, {width, height}}, - [=](sycl::nd_item<2> it) { - size_t dim0 = it.get_local_id(0); - size_t dim1 = it.get_local_id(1); - - // Sum each image by reading via its handle - float sum = 0; - for (int i = 0; i < numImages; i++) { - // Extension: read image data from handle - sum += (sycl::ext::oneapi::experimental::fetch_image( - imgHandleAcc[i], sycl::vec(dim0, dim1))); - } - outAcc[sycl::id{dim1, dim0}] = sum; - }); - }); - - // Using image handles requires manual synchronization - q.wait_and_throw(); - - // Cleanup - for (int i = 0; i < numImages; i++) { - sycl::ext::oneapi::experimental::destroy_image_handle(imgHandles[i], queue); - } -} catch (sycl::exception e) { - std::cerr << "SYCL exception caught: " << e.what(); - exit(-1); -} +#include -// Validate that `dataOut` is correct -bool validated = (dataOut == dataExpected); +include::../../../test-e2e/bindless_images/examples/example_2_2D_dynamic_read.cpp[lines=9..-1] ``` === Reading a 1D mipmap with anisotropic filtering and levels ```cpp -// Set up device, queue, and context -sycl::device device; -sycl::queue queue(device); -sycl::context context = q.get_context(); - -// declare image data -constexpr size_t width = 16; -unsigned int num_levels = 2; -std::vector dataIn1(width); -std::vector dataIn2(width / 2); -std::vector dataOut(width); -std::vector dataExpected(width); -int j = 0; -for (int i = 0; i < width; i++) { - dataExpected[i] = static_cast(i + (j + 10)); - if (i % 2) - j++; - dataIn1[i] = static_cast(i); - if (i < (N / 2)) - dataIn2[i] = static_cast(i + 10); -} - -try { - - // Image descriptor -- number of levels - sycl::ext::oneapi::experimental::image_descriptor desc( - {width}, 1, - sycl::ext::oneapi::experimental::image_channel_type::fp32, - sycl::ext::oneapi::experimental::image_type::mipmap, num_levels); - - // Allocate the mipmap - sycl::ext::oneapi::experimental::image_mem mip_mem(desc, queue); - - // Retrieve level 0 - sycl::ext::oneapi::experimental::image_mem_handle img_mem1 = - mip_mem.get_mip_level_mem_handle(0) - - // Copy over data to level 0 - q.ext_oneapi_copy(dataIn1.data(), img_mem1, desc); - - // Copy over data to level 1 - q.ext_oneapi_copy(dataIn2.data(), mip_mem.get_mip_level_mem_handle(1), - desc.get_mip_level_desc(1)); - q.wait_and_throw(); - - // Extended sampler object to take in mipmap attributes - sycl::ext::oneapi::experimental::bindless_image_sampler samp( - addressing_mode::mirrored_repeat, - coordinate_normalization_mode::normalized, filtering_mode::nearest, - mipmap_filtering_mode::nearest, 0.0f, (float)num_levels, 8.0f); - - // Create a sampled image handle to represent the mipmap - sycl::ext::oneapi::experimental::sampled_image_handle mipHandle = - sycl::ext::oneapi::experimental::create_image(mip_mem, samp, desc, queue); - q.wait_and_throw(); - - sycl::buffer buf((float *)dataOut.data(), width); - q.submit([&](handler &cgh) { - auto outAcc = buf.get_access(cgh, width); - - cgh.parallel_for(width, [=](id<1> id) { - float sum = 0; - float x = (static_cast(id[0]) + 0.5f) / static_cast(width); - // Read mipmap level 0 with anisotropic filtering - // and level 1 with level filtering - float px1 = sycl::ext::oneapi::experimental::sample_mipmap( - mipHandle, x, 0.0f, 0.0f); - float px2 = sycl::ext::oneapi::experimental::sample_mipmap( - mipHandle, x, 1.0f); - - sum = px1 + px2; - outAcc[id] = sum; - }); - }); - - q.wait_and_throw(); - - // Cleanup - sycl::ext::oneapi::experimental::destroy_image_handle(mipHandle, queue); - -} catch (sycl::exception e) { - std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); -} catch (...) { - std::cerr << "Unknown exception caught!\n"; - exit(-1); -} +#include -// Validate that `dataOut` is correct -bool validated = (dataOut == dataExpected); +include::../../../test-e2e/bindless_images/examples/example_3_1D_mipmap_anisotropic_filtering_and_levels.cpp[lines=9..-1] ``` === 1D image array read/write ```cpp -using VecType = sycl::vec; - -sycl::device dev; -sycl::queue q(dev); -auto ctxt = q.get_context(); - -// declare image data -constexpr size_t width = 5; -constexpr size_t array_size = 2; -constexpr size_t N = width; -std::vector out(N * array_size); -std::vector expected(N * array_size); -std::vector outBuf(N); -std::vector dataIn1(N * array_size); -std::vector dataIn2(N * array_size); - -for (int i = 0; i < N * array_size; i++) { - // Populate input data (to-be image arrays) - dataIn1[i] = VecType(i); - dataIn2[i] = VecType(2*i); -} - -// Populate expected output -for (int i = 0; i < width; i++) { - for (int l = 0; l < array_size; l++) { - expected[l * N + i] = dataIn1[l * N + i][0] + dataIn2[l * N + i][0]; - } -} - -try { - // Extension: image descriptor -- number of layers - sycl::ext::oneapi::experimental::image_descriptor desc( - {width}, 4, sycl::image_channel_type::fp32, - sycl::ext::oneapi::experimental::image_type::array, 1, array_size); - - // Extension: allocate image array memory on device - sycl::ext::oneapi::experimental::image_mem arrayMem1(desc, dev, ctxt); - sycl::ext::oneapi::experimental::image_mem arrayMem2(desc, dev, ctxt); - sycl::ext::oneapi::experimental::image_mem outMem(desc, dev, ctxt); - - // Extension: copy over data to device - q.ext_oneapi_copy(dataIn1.data(), arrayMem1.get_handle(), desc); - q.ext_oneapi_copy(dataIn2.data(), arrayMem2.get_handle(), desc); - q.wait_and_throw(); - - // Extension: create a unsampled image handles to represent the image arrays - sycl::ext::oneapi::experimental::unsampled_image_handle arrayHandle1 = - sycl::ext::oneapi::experimental::create_image(arrayMem1, desc, dev, - ctxt); - sycl::ext::oneapi::experimental::unsampled_image_handle arrayHandle2 = - sycl::ext::oneapi::experimental::create_image(arrayMem2, desc, dev, - ctxt); - sycl::ext::oneapi::experimental::unsampled_image_handle outHandle = - sycl::ext::oneapi::experimental::create_image(outMem, desc, dev, - ctxt); - - q.submit([&](sycl::handler &cgh) { - - cgh.parallel_for(N, [=](sycl::id<1> id) { - float sum1 = 0; - float sum2 = 0; - - // Extension: read image layers 0 and 1 - VecType px1 = sycl::ext::oneapi::experimental::fetch_image_array( - arrayHandle1, int(id[0]), 0); - VecType px2 = sycl::ext::oneapi::experimental::fetch_image_array( - arrayHandle1, int(id[0]), 1); - - // Extension: read image layers 0 and 1 - VecType px3 = sycl::ext::oneapi::experimental::fetch_image_array( - arrayHandle2, int(id[0]), 0); - VecType px4 = sycl::ext::oneapi::experimental::fetch_image_array( - arrayHandle2, int(id[0]), 1); - - sum1 = px1[0] + px3[0]; - sum2 = px2[0] + px4[0]; - - // Extension: write to image layers with handle - sycl::ext::oneapi::experimental::write_image_array( - outHandle, int(id[0]), 0, VecType(sum1)); - sycl::ext::oneapi::experimental::write_image_array( - outHandle, int(id[0]), 1, VecType(sum2)); - }); - }); - - q.wait_and_throw(); - - // Extension: copy data from device to host - q.ext_oneapi_copy(outMem.get_handle(), out.data(), desc); - q.wait_and_throw(); - - // Extension: cleanup - sycl::ext::oneapi::experimental::destroy_image_handle(arrayHandle1, dev, ctxt); - sycl::ext::oneapi::experimental::destroy_image_handle(arrayHandle2, dev, ctxt); - sycl::ext::oneapi::experimental::destroy_image_handle(outHandle, dev, ctxt); - -} catch (sycl::exception e) { - std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - std::cout << "Test failed!" << std::endl; - exit(1); -} catch (...) { - std::cerr << "Unknown exception caught!\n"; - std::cout << "Test failed!" << std::endl; - exit(2); -} - -// collect and validate output -bool validated = true; -for (int i = 0; i < N * array_size; i++) { - bool mismatch = false; - if (out[i][0] != expected[i]) { - mismatch = true; - validated = false; - } -} -if (validated) { - return 0; -} +#include -return 1; +include::../../../test-e2e/bindless_images/examples/example_4_1D_array_read_write.cpp[lines=9..-1] ``` === Sampling a cubemap ```c++ -#include #include -int main() { - - namespace syclexp = sycl::ext::oneapi::experimental; - - sycl::device dev; - sycl::queue q(dev); - auto ctxt = q.get_context(); - - // declare image data - // width and height must be equal - size_t width = 8; - size_t height = 8; - size_t N = width * height; - std::vector out(N); - std::vector expected(N); - std::vector dataIn1(N * 6); - for (int i = 0; i < width; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < 6; k++) { - dataIn1[i + width * (j + height * k)] = {i + width * (j + height * k), - 0, 0, 0}; - } - } - } - - int j = 0; - for (int i = N - 1; i >= 0; i--) { - expected[j] = (float)i; - j++; - } - - // Extension: image descriptor - Cubemap - syclexp::image_descriptor desc( - {width, height}, 4, - sycl::image_channel_type::fp32, syclexp::image_type::cubemap, 1, 6); - - syclexp::bindless_image_sampler samp( - sycl::addressing_mode::clamp_to_edge, - sycl::coordinate_normalization_mode::normalized, - sycl::filtering_mode::nearest, syclexp::cubemap_filtering_mode::seamless); - - try { - // Extension: allocate memory on device and create the handle - syclexp::image_mem imgMem(desc, dev, ctxt); - - // Extension: create the image and return the handle - syclexp::sampled_image_handle imgHandle = - syclexp::create_image(imgMem, samp, desc, dev, ctxt); - - // Extension: copy over data to device (handler variant) - q.submit([&](sycl::handler &cgh) { - cgh.ext_oneapi_copy(dataIn1.data(), imgMem.get_handle(), desc); - }); - q.wait_and_throw(); - - sycl::buffer buf((float *)out.data(), - sycl::range<2>{height, width}); - q.submit([&](sycl::handler &cgh) { - auto outAcc = buf.get_access( - cgh, sycl::range<2>{height, width}); - - // Emanating vector scans one face - cgh.parallel_for( - sycl::nd_range<2>{{width, height}, {width, height}}, - [=](sycl::nd_item<2> it) { - size_t dim0 = it.get_local_id(0); - size_t dim1 = it.get_local_id(1); - - // Direction Vector - // x -- largest magnitude - // y -- shifted between [-0.99, 0.99] + offset - // z -- shifted between [-0.99, 0.99] + offset - // - // [-0.99, 0.99] -- maintains x as largest magnitude - // - // 4 elems == [-1, -0.5, 0, 0.5] -- need offset to bring uniformity - // +0.25 = [-0.75, -0.25, 0.25, 0.75] - float fdim0 = 1.f; - float fdim1 = (((float(dim0) / (float)width) * 1.98) - 0.99) + - (1.f / (float)width); - float fdim2 = (((float(dim1) / (float)height) * 1.98) - 0.99) + - (1.f / (float)height); - - // Extension: read texture cubemap data from handle - sycl::float4 px = syclexp::sample_cubemap( - imgHandle, sycl::float3(fdim0, fdim1, fdim2)); - - outAcc[sycl::id<2>{dim0, dim1}] = px[0]; - }); - }); - q.wait_and_throw(); - - // Extension: cleanup - syclexp::destroy_image_handle(imgHandle, dev, ctxt); - } catch (sycl::exception e) { - std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - return 1; - } catch (...) { - std::cerr << "Unknown exception caught!\n"; - return 2; - } - - // collect and validate output - bool validated = true; - for (int i = 0; i < N; i++) { - bool mismatch = false; - if (out[i] != expected[i]) { - mismatch = true; - validated = false; - } - if (mismatch) { - std::cout << "Result mismatch! Expected: " << expected[i] - << ", Actual: " << out[i] << std::endl; - } - } - if (validated) { - std::cout << "Test passed!" << std::endl; - return 0; - } - - std::cout << "Test failed!" << std::endl; - return 3; -} +include::../../../test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp[lines=9..-1] ``` === Using imported memory and semaphore objects ```c++ -// Set up device, queue, and context -sycl::device device; -sycl::queue queue(device); -sycl::context context = queue.get_context(); - -size_t width = /* passed from external API */; -size_t height = /* passed from external API */; - -unsigned int num_channels = 1; - /* mapped from external API */ - /* we assume there is one channel */; - -sycl::ext::oneapi::experimental::image_channel_type channel_type = - /* mapped from external API */ - /* we assume sycl::image_channel_type::unsigned_int32 */; - -// Image descriptor - mapped to external API image layout -sycl::ext::oneapi::experimental::image_descriptor desc( - {width, height}, num_channels, channel_type); - -size_t img_size_in_bytes = width * height * sizeof(uint32_t); - -int external_input_image_file_descriptor = /* passed from external API */ -int external_output_image_file_descriptor = /* passed from external API */ - -// Extension: populate external memory descriptors -sycl::ext::oneapi::experimental::external_mem_descriptor< - sycl::ext::oneapi::experimental::resource_fd> - input_ext_mem_desc{ - external_input_image_file_descriptor, - sycl::ext::oneapi::experimental::external_mem_handle_type::opaque_fd, - img_size_in_bytes}; - -sycl::ext::oneapi::experimental::external_mem_descriptor< - sycl::ext::oneapi::experimental::resource_fd> - output_ext_mem_desc{ - external_output_image_file_descriptor, - sycl::ext::oneapi::experimental::external_mem_handle_type::opaque_fd, - img_size_in_bytes}; - -// An external API semaphore will signal this semaphore before our SYCL commands -// can begin execution -int wait_semaphore_file_descriptor = /* passed from external API */; - -// An external API will wait on this semaphore to be signalled by us before it -// can execute some commands -int done_semaphore_file_descriptor = /* passed from external API */; - -// Extension: populate external semaphore descriptor. -// We assume POSIX file descriptor resource types -sycl::ext::oneapi::experimental::external_semaphore_descriptor< - sycl::ext::oneapi::experimental::resource_fd> - wait_external_semaphore_desc{wait_semaphore_file_descriptor, - sycl::ext::oneapi::experimental::external_semaphore_handle_type::opaque_fd}; - -sycl::ext::oneapi::experimental::external_semaphore_descriptor< - sycl::ext::oneapi::experimental::resource_fd> - done_external_semaphore_desc{done_semaphore_file_descriptor, - sycl::ext::oneapi::experimental::external_semaphore_handle_type::opaque_fd}; - -try { - // Extension: import external semaphores - sycl::ext::oneapi::experimental::external_semaphore - wait_external_semaphore = - sycl::ext::oneapi::experimental::import_external_semaphore( - wait_external_semaphore_desc, queue); - - sycl::ext::oneapi::experimental::external_semaphore - done_external_semaphore = - sycl::ext::oneapi::experimental::import_external_semaphore( - done_external_semaphore_desc, queue); - - // Extension: import external memory from descriptors - sycl::ext::oneapi::experimental::external_mem - input_external_mem = - sycl::ext::oneapi::experimental::import_external_memory( - input_ext_mem_desc, queue); - - sycl::ext::oneapi::experimental::external_mem - output_external_mem = - sycl::ext::oneapi::experimental::import_external_memory( - output_ext_mem_desc, queue); - - // Extension: map imported external memory to image memory - sycl::ext::oneapi::experimental::image_mem_handle input_mapped_mem_handle = - sycl::ext::oneapi::experimental::map_external_image_memory( - input_external_mem, desc, queue); - sycl::ext::oneapi::experimental::image_mem_handle output_mapped_mem_handle = - sycl::ext::oneapi::experimental::map_external_image_memory( - output_external_mem, desc, queue); - - // Extension: create images from mapped memory and return the handles - sycl::ext::oneapi::experimental::unsampled_image_handle img_input = - sycl::ext::oneapi::experimental::create_image( - input_mapped_mem_handle, desc, queue); - sycl::ext::oneapi::experimental::unsampled_image_handle img_output = - sycl::ext::oneapi::experimental::create_image( - output_mapped_mem_handle, desc, queue); - - // Extension: wait for imported semaphore - q.ext_oneapi_wait_external_semaphore(wait_external_semaphore) - - // Submit our kernel that depends on imported "wait_semaphore_file_descriptor" - q.submit([&](sycl::handler &cgh) { - cgh.parallel_for<>( - sycl::nd_range<2>{{width, height}, {32, 32}}, - [=](sycl::nd_item<2> it) { - size_t dim0 = it.get_global_id(0); - size_t dim1 = it.get_global_id(1); - - // Extension: read image data from handle to imported image - uint32_t pixel = - sycl::ext::oneapi::experimental::fetch_image( - img_input, sycl::vec(dim0, dim1)); - - // Modify the data before writing back - pixel *= 10; - - // Extension: write image data using handle to imported image - sycl::ext::oneapi::experimental::write_image( - img_output, sycl::vec(dim0, dim1), pixel); - }); - }); - - // Extension: signal imported semaphore - q.ext_oneapi_signal_external_semaphore(done_external_semaphore) - - // The external API can now use the semaphore it exported to - // "done_semaphore_file_descriptor" to schedule its own command submissions - - q.wait_and_throw(); - - // Extension: destroy all external resources - sycl::ext::oneapi::experimental::release_external_memory( - input_external_mem, queue); - sycl::ext::oneapi::experimental::release_external_memory( - output_external_mem, queue); - sycl::ext::oneapi::experimental::release_external_semaphore( - wait_external_semaphore, queue); - sycl::ext::oneapi::experimental::release_external_semaphore( - done_external_semaphore, queue); - sycl::ext::oneapi::experimental::destroy_image_handle(img_input, queue); - sycl::ext::oneapi::experimental::destroy_image_handle(img_output, queue); -} catch (sycl::exception e) { - std::cerr << "SYCL exception caught! : " << e.what() << "\n"; - exit(-1); -} catch (...) { - std::cerr << "Unknown exception caught!\n"; - exit(-1); -} +#include + +include::../../../test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp[lines=8..-1] ``` == Implementation notes @@ -2943,4 +2317,6 @@ These features still need to be handled: equivalent to `clamp`, to match with external APIs. |6.3|2024-10-02| - Add support for `image_mem_handle` to `image_mem_handle` sub-region copies. +|6.4|2024-10-15| - Fix bindless spec examples and include examples in bindless + spec using asciidoc include. |====================== diff --git a/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp b/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp new file mode 100644 index 0000000000000..15ef93a9deffc --- /dev/null +++ b/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp @@ -0,0 +1,74 @@ +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %t.out + +#include +#include + +int main() { + // Set up device, queue, and context + sycl::device dev; + sycl::queue q(dev); + sycl::context ctxt = q.get_context(); + + // Initialize input data + constexpr size_t width = 512; + std::vector dataIn(width); + std::vector dataOut(width); + for (int i = 0; i < width; i++) { + dataIn[i] = static_cast(i); + } + + // Image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor desc( + sycl::range{width}, 1, sycl::image_channel_type::fp32); + + // Extension: returns the device pointer to the allocated memory + sycl::ext::oneapi::experimental::image_mem imgMemoryIn(desc, q); + sycl::ext::oneapi::experimental::image_mem imgMemoryOut(desc, q); + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgIn = + sycl::ext::oneapi::experimental::create_image(imgMemoryIn, desc, q); + sycl::ext::oneapi::experimental::unsampled_image_handle imgOut = + sycl::ext::oneapi::experimental::create_image(imgMemoryOut, desc, q); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn.data(), imgMemoryIn.get_handle(), desc); + + // Bindless images require manual synchronization + // Wait for copy operation to finish + q.wait_and_throw(); + + q.submit([&](sycl::handler &cgh) { + // No need to request access, handles captured by value + + cgh.parallel_for(width, [=](sycl::id<1> id) { + // Extension: read image data from handle + float pixel = sycl::ext::oneapi::experimental::fetch_image( + imgIn, int(id[0])); + + // Extension: write to image data using handle + sycl::ext::oneapi::experimental::write_image(imgOut, int(id[0]), pixel); + }); + }); + + // Using image handles requires manual synchronization + q.wait_and_throw(); + + // Copy data written to imgOut to host + q.ext_oneapi_copy(imgMemoryOut.get_handle(), dataOut.data(), desc); + + // Cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgIn, q); + sycl::ext::oneapi::experimental::destroy_image_handle(imgOut, q); + + // Validate that `dataIn` correctly transferred to `dataOut` + for (size_t i = 0; i < width; i++) { + if (dataOut[i] != dataIn[i]) { + return 1; + } + } + return 0; +} diff --git a/sycl/test-e2e/bindless_images/examples/example_2_2D_dynamic_read.cpp b/sycl/test-e2e/bindless_images/examples/example_2_2D_dynamic_read.cpp new file mode 100644 index 0000000000000..035c5314d4585 --- /dev/null +++ b/sycl/test-e2e/bindless_images/examples/example_2_2D_dynamic_read.cpp @@ -0,0 +1,103 @@ +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %t.out + +#include +#include + +int main() { + // Set up device, queue, and context + sycl::device dev; + sycl::queue q(dev); + sycl::context ctxt = q.get_context(); + + // declare image data + size_t numImages = 5; + size_t width = 8; + size_t height = 8; + size_t numPixels = width * height; + std::vector dataIn(numPixels); + std::vector dataOut(numPixels); + std::vector dataExpected(numPixels); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + int index = j + (height * i); + dataIn[index] = index; + dataExpected[index] = index * numImages; + } + } + + // Image descriptor - can use the same for all images + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, 1, sycl::image_channel_type::fp32); + + // Allocate each image and save the handles + std::vector imgAllocations; + for (int i = 0; i < numImages; i++) { + // Extension: move-construct device allocated memory + imgAllocations.emplace_back( + sycl::ext::oneapi::experimental::image_mem{desc, q}); + } + + // Copy over data to device for each image + for (int i = 0; i < numImages; i++) { + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn.data(), imgAllocations[i].get_handle(), desc); + } + + // Wait for copy operations to finish + q.wait_and_throw(); + + // Create the images and return the handles + std::vector + imgHandles; + for (int i = 0; i < numImages; i++) { + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle = + sycl::ext::oneapi::experimental::create_image(imgAllocations[i], desc, + q); + imgHandles.push_back(imgHandle); + } + + { + sycl::buffer outBuf{dataOut.data(), sycl::range{height, width}}; + sycl::buffer imgHandlesBuf{imgHandles.data(), sycl::range{numImages}}; + q.submit([&](sycl::handler &cgh) { + sycl::accessor outAcc{outBuf, cgh, sycl::write_only}; + sycl::accessor imgHandleAcc{imgHandlesBuf, cgh, sycl::read_only}; + + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Sum each image by reading via its handle + float sum = 0; + for (int i = 0; i < numImages; i++) { + // Extension: read image data from handle + sum += (sycl::ext::oneapi::experimental::fetch_image( + imgHandleAcc[i], sycl::vec(dim0, dim1))); + } + outAcc[sycl::id{dim1, dim0}] = sum; + }); + }); + } + + // Using image handles requires manual synchronization + q.wait_and_throw(); + + // Cleanup + for (int i = 0; i < numImages; i++) { + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandles[i], q); + } + + // Validate that `dataOut` is correct + for (size_t i = 0; i < numPixels; i++) { + if (dataOut[i] != dataExpected[i]) { + return 1; + } + } + return 0; +} diff --git a/sycl/test-e2e/bindless_images/examples/example_3_1D_mipmap_anisotropic_filtering_and_levels.cpp b/sycl/test-e2e/bindless_images/examples/example_3_1D_mipmap_anisotropic_filtering_and_levels.cpp new file mode 100644 index 0000000000000..2e5d55eb32398 --- /dev/null +++ b/sycl/test-e2e/bindless_images/examples/example_3_1D_mipmap_anisotropic_filtering_and_levels.cpp @@ -0,0 +1,98 @@ +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %t.out + +#include +#include + +int main() { + // Set up device, queue, and context + sycl::device dev; + sycl::queue q(dev); + sycl::context ctxt = q.get_context(); + + // declare image data + constexpr size_t width = 16; + unsigned int num_levels = 2; + std::vector dataIn1(width); + std::vector dataIn2(width / 2); + std::vector dataOut(width); + std::vector dataExpected(width); + int j = 0; + for (int i = 0; i < width; i++) { + dataExpected[i] = static_cast(i + (j + 10)); + if (i % 2) + j++; + dataIn1[i] = static_cast(i); + if (i < (width / 2)) + dataIn2[i] = static_cast(i + 10); + } + + // Image descriptor -- number of levels + sycl::ext::oneapi::experimental::image_descriptor desc( + {width}, 1, sycl::image_channel_type::fp32, + sycl::ext::oneapi::experimental::image_type::mipmap, num_levels); + + // Allocate the mipmap + sycl::ext::oneapi::experimental::image_mem mip_mem(desc, q); + + // Retrieve level 0 + sycl::ext::oneapi::experimental::image_mem_handle img_mem1 = + mip_mem.get_mip_level_mem_handle(0); + + // Copy over data to level 0 + q.ext_oneapi_copy(dataIn1.data(), img_mem1, desc); + + // Copy over data to level 1 + q.ext_oneapi_copy(dataIn2.data(), mip_mem.get_mip_level_mem_handle(1), + desc.get_mip_level_desc(1)); + q.wait_and_throw(); + + // Extended sampler object to take in mipmap attributes + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + sycl::addressing_mode::mirrored_repeat, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::nearest, sycl::filtering_mode::nearest, 0.0f, + static_cast(num_levels), 8.0f); + + // Create a sampled image handle to represent the mipmap + sycl::ext::oneapi::experimental::sampled_image_handle mipHandle = + sycl::ext::oneapi::experimental::create_image(mip_mem, samp, desc, q); + q.wait_and_throw(); + + { + sycl::buffer buf((float *)dataOut.data(), width); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access(cgh, width); + + cgh.parallel_for(width, [=](sycl::id<1> id) { + float sum = 0; + float x = + (static_cast(id[0]) + 0.5f) / static_cast(width); + // Read mipmap level 0 with anisotropic filtering + // and level 1 with level filtering + float px1 = sycl::ext::oneapi::experimental::sample_mipmap( + mipHandle, x, 0.0f, 0.0f); + float px2 = sycl::ext::oneapi::experimental::sample_mipmap( + mipHandle, x, 1.0f); + + sum = px1 + px2; + outAcc[id] = sum; + }); + }); + } + + q.wait_and_throw(); + + // Cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(mipHandle, q); + + // Validate that `dataOut` is correct + for (size_t i = 0; i < width; i++) { + if (dataOut[i] != dataExpected[i]) { + return 1; + } + } + return 0; +} diff --git a/sycl/test-e2e/bindless_images/examples/example_4_1D_array_read_write.cpp b/sycl/test-e2e/bindless_images/examples/example_4_1D_array_read_write.cpp new file mode 100644 index 0000000000000..511c5a66e1828 --- /dev/null +++ b/sycl/test-e2e/bindless_images/examples/example_4_1D_array_read_write.cpp @@ -0,0 +1,111 @@ +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %t.out + +#include +#include + +using VecType = sycl::vec; + +int main() { + // Set up device, queue, and context + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + constexpr size_t width = 5; + constexpr size_t array_size = 2; + constexpr size_t N = width; + std::vector out(N * array_size); + std::vector expected(N * array_size); + std::vector outBuf(N); + std::vector dataIn1(N * array_size); + std::vector dataIn2(N * array_size); + + for (int i = 0; i < N * array_size; i++) { + // Populate input data (to-be image arrays) + dataIn1[i] = VecType(i); + dataIn2[i] = VecType(2 * i); + } + + // Populate expected output + for (int i = 0; i < width; i++) { + for (int l = 0; l < array_size; l++) { + expected[l * N + i] = dataIn1[l * N + i][0] + dataIn2[l * N + i][0]; + } + } + + // Extension: image descriptor -- number of layers + sycl::ext::oneapi::experimental::image_descriptor desc( + {width}, 4, sycl::image_channel_type::fp32, + sycl::ext::oneapi::experimental::image_type::array, 1, array_size); + + // Extension: allocate image array memory on device + sycl::ext::oneapi::experimental::image_mem arrayMem1(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem arrayMem2(desc, dev, ctxt); + sycl::ext::oneapi::experimental::image_mem outMem(desc, dev, ctxt); + + // Extension: copy over data to device + q.ext_oneapi_copy(dataIn1.data(), arrayMem1.get_handle(), desc); + q.ext_oneapi_copy(dataIn2.data(), arrayMem2.get_handle(), desc); + q.wait_and_throw(); + + // Extension: create a unsampled image handles to represent the image arrays + sycl::ext::oneapi::experimental::unsampled_image_handle arrayHandle1 = + sycl::ext::oneapi::experimental::create_image(arrayMem1, desc, dev, ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle arrayHandle2 = + sycl::ext::oneapi::experimental::create_image(arrayMem2, desc, dev, ctxt); + sycl::ext::oneapi::experimental::unsampled_image_handle outHandle = + sycl::ext::oneapi::experimental::create_image(outMem, desc, dev, ctxt); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for(N, [=](sycl::id<1> id) { + float sum1 = 0; + float sum2 = 0; + + // Extension: read image layers 0 and 1 + VecType px1 = sycl::ext::oneapi::experimental::fetch_image_array( + arrayHandle1, int(id[0]), 0); + VecType px2 = sycl::ext::oneapi::experimental::fetch_image_array( + arrayHandle1, int(id[0]), 1); + + // Extension: read image layers 0 and 1 + VecType px3 = sycl::ext::oneapi::experimental::fetch_image_array( + arrayHandle2, int(id[0]), 0); + VecType px4 = sycl::ext::oneapi::experimental::fetch_image_array( + arrayHandle2, int(id[0]), 1); + + sum1 = px1[0] + px3[0]; + sum2 = px2[0] + px4[0]; + + // Extension: write to image layers with handle + sycl::ext::oneapi::experimental::write_image_array( + outHandle, int(id[0]), 0, VecType(sum1)); + sycl::ext::oneapi::experimental::write_image_array( + outHandle, int(id[0]), 1, VecType(sum2)); + }); + }); + + q.wait_and_throw(); + + // Extension: copy data from device to host + q.ext_oneapi_copy(outMem.get_handle(), out.data(), desc); + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(arrayHandle1, dev, + ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(arrayHandle2, dev, + ctxt); + sycl::ext::oneapi::experimental::destroy_image_handle(outHandle, dev, ctxt); + + // Collect and validate output + for (size_t i = 0; i < N * array_size; i++) { + if (out[i][0] != expected[i]) { + return 1; + } + } + return 0; +} diff --git a/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp b/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp new file mode 100644 index 0000000000000..a332802fc627d --- /dev/null +++ b/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp @@ -0,0 +1,112 @@ +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %t.out + +#include +#include + +int main() { + namespace syclexp = sycl::ext::oneapi::experimental; + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + // width and height must be equal + size_t width = 8; + size_t height = 8; + size_t N = width * height; + std::vector out(N); + std::vector expected(N); + std::vector dataIn1(N * 6); + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < 6; k++) { + dataIn1[i + width * (j + height * k)] = {i + width * (j + height * k), + 0, 0, 0}; + } + } + } + + int j = 0; + for (int i = N - 1; i >= 0; i--) { + expected[j] = static_cast(i); + j++; + } + + // Extension: image descriptor - Cubemap + syclexp::image_descriptor desc({width, height}, 4, + sycl::image_channel_type::fp32, + syclexp::image_type::cubemap, 1, 6); + + syclexp::bindless_image_sampler samp( + sycl::addressing_mode::clamp_to_edge, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::nearest, syclexp::cubemap_filtering_mode::seamless); + + // Extension: allocate memory on device and create the handle + syclexp::image_mem imgMem(desc, dev, ctxt); + + // Extension: create the image and return the handle + syclexp::sampled_image_handle imgHandle = + syclexp::create_image(imgMem, samp, desc, dev, ctxt); + + // Extension: copy over data to device (handler variant) + q.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_copy(dataIn1.data(), imgMem.get_handle(), desc); + }); + q.wait_and_throw(); + + { + sycl::buffer buf((float *)out.data(), + sycl::range<2>{height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + + // Emanating vector scans one face + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Direction Vector + // x -- largest magnitude + // y -- shifted between [-0.99, 0.99] + offset + // z -- shifted between [-0.99, 0.99] + offset + // + // [-0.99, 0.99] -- maintains x as largest magnitude + // + // 4 elems == [-1, -0.5, 0, 0.5] -- need offset to bring uniformity + // +0.25 = [-0.75, -0.25, 0.25, 0.75] + float fdim0 = 1.f; + float fdim1 = (((float(dim0) / (float)width) * 1.98) - 0.99) + + (1.f / (float)width); + float fdim2 = (((float(dim1) / (float)height) * 1.98) - 0.99) + + (1.f / (float)height); + + // Extension: read texture cubemap data from handle + sycl::float4 px = syclexp::sample_cubemap( + imgHandle, sycl::float3(fdim0, fdim1, fdim2)); + + outAcc[sycl::id<2>{dim0, dim1}] = px[0]; + }); + }); + } + + q.wait_and_throw(); + + // Extension: cleanup + syclexp::destroy_image_handle(imgHandle, dev, ctxt); + + // Collect and validate output + for (size_t i = 0; i < N; i++) { + if (out[i] != expected[i]) { + return 1; + } + } + return 0; +} diff --git a/sycl/test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp b/sycl/test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp new file mode 100644 index 0000000000000..d555b73d49aac --- /dev/null +++ b/sycl/test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp @@ -0,0 +1,155 @@ +// REQUIRES: cuda + +// RUN: %{build} -o %t.out + +#include +#include + +int main() { + // Set up device, queue, and context + sycl::device dev; + sycl::queue q(dev); + sycl::context ctxt = q.get_context(); + + size_t width = 123 /* passed from external API */; + size_t height = 123 /* passed from external API */; + + /* mapped from external API */ + unsigned int num_channels = 1; + /* we assume there is one channel */; + + sycl::image_channel_type channel_type = + /* mapped from external API */ + /* we assume */ sycl::image_channel_type::unsigned_int32; + + // Image descriptor - mapped to external API image layout + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, num_channels, channel_type); + + size_t img_size_in_bytes = width * height * sizeof(uint32_t); + + int external_input_image_file_descriptor = 123 /* passed from external API */; + int external_output_image_file_descriptor = + 123 /* passed from external API */; + + // Extension: populate external memory descriptors + sycl::ext::oneapi::experimental::external_mem_descriptor< + sycl::ext::oneapi::experimental::resource_fd> + input_ext_mem_desc{ + external_input_image_file_descriptor, + sycl::ext::oneapi::experimental::external_mem_handle_type::opaque_fd, + img_size_in_bytes}; + + sycl::ext::oneapi::experimental::external_mem_descriptor< + sycl::ext::oneapi::experimental::resource_fd> + output_ext_mem_desc{ + external_output_image_file_descriptor, + sycl::ext::oneapi::experimental::external_mem_handle_type::opaque_fd, + img_size_in_bytes}; + + // An external API semaphore will signal this semaphore before our SYCL + // commands can begin execution + int wait_semaphore_file_descriptor = 123 /* passed from external API */; + + // An external API will wait on this semaphore to be signalled by us before it + // can execute some commands + int done_semaphore_file_descriptor = 123 /* passed from external API */; + + // Extension: populate external semaphore descriptor. + // We assume POSIX file descriptor resource types + sycl::ext::oneapi::experimental::external_semaphore_descriptor< + sycl::ext::oneapi::experimental::resource_fd> + wait_external_semaphore_desc{ + wait_semaphore_file_descriptor, + sycl::ext::oneapi::experimental::external_semaphore_handle_type:: + opaque_fd}; + + sycl::ext::oneapi::experimental::external_semaphore_descriptor< + sycl::ext::oneapi::experimental::resource_fd> + done_external_semaphore_desc{ + done_semaphore_file_descriptor, + sycl::ext::oneapi::experimental::external_semaphore_handle_type:: + opaque_fd}; + + // Extension: import external semaphores + sycl::ext::oneapi::experimental::external_semaphore wait_external_semaphore = + sycl::ext::oneapi::experimental::import_external_semaphore( + wait_external_semaphore_desc, q); + + sycl::ext::oneapi::experimental::external_semaphore done_external_semaphore = + sycl::ext::oneapi::experimental::import_external_semaphore( + done_external_semaphore_desc, q); + + // Extension: import external memory from descriptors + sycl::ext::oneapi::experimental::external_mem input_external_mem = + sycl::ext::oneapi::experimental::import_external_memory( + input_ext_mem_desc, q); + + sycl::ext::oneapi::experimental::external_mem output_external_mem = + sycl::ext::oneapi::experimental::import_external_memory( + output_ext_mem_desc, q); + + // Extension: map imported external memory to image memory + sycl::ext::oneapi::experimental::image_mem_handle input_mapped_mem_handle = + sycl::ext::oneapi::experimental::map_external_image_memory( + input_external_mem, desc, q); + sycl::ext::oneapi::experimental::image_mem_handle output_mapped_mem_handle = + sycl::ext::oneapi::experimental::map_external_image_memory( + output_external_mem, desc, q); + + // Extension: create images from mapped memory and return the handles + sycl::ext::oneapi::experimental::unsampled_image_handle img_input = + sycl::ext::oneapi::experimental::create_image(input_mapped_mem_handle, + desc, q); + sycl::ext::oneapi::experimental::unsampled_image_handle img_output = + sycl::ext::oneapi::experimental::create_image(output_mapped_mem_handle, + desc, q); + + // Extension: wait for imported semaphore + q.ext_oneapi_wait_external_semaphore(wait_external_semaphore); + + // Submit our kernel that depends on imported + // "wait_semaphore_file_descriptor" + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for<>( + sycl::nd_range<2>{{width, height}, {32, 32}}, [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_global_id(0); + size_t dim1 = it.get_global_id(1); + + // Extension: read image data from handle to imported image + uint32_t pixel = + sycl::ext::oneapi::experimental::fetch_image( + img_input, sycl::vec(dim0, dim1)); + + // Modify the data before writing back + pixel *= 10; + + // Extension: write image data using handle to imported image + sycl::ext::oneapi::experimental::write_image( + img_output, sycl::vec(dim0, dim1), pixel); + }); + }); + + // Extension: signal imported semaphore + q.ext_oneapi_signal_external_semaphore(done_external_semaphore); + + // The external API can now use the semaphore it exported to + // "done_semaphore_file_descriptor" to schedule its own command + // submissions + + q.wait_and_throw(); + + // Extension: destroy all external resources + sycl::ext::oneapi::experimental::release_external_memory(input_external_mem, + q); + sycl::ext::oneapi::experimental::release_external_memory(output_external_mem, + q); + sycl::ext::oneapi::experimental::release_external_semaphore( + wait_external_semaphore, q); + sycl::ext::oneapi::experimental::release_external_semaphore( + done_external_semaphore, q); + sycl::ext::oneapi::experimental::destroy_image_handle(img_input, q); + sycl::ext::oneapi::experimental::destroy_image_handle(img_output, q); + + return 0; +}