From ce56217185f4deeb940a2b6debdc76672bba2056 Mon Sep 17 00:00:00 2001 From: "Yadav, Rishi" Date: Mon, 20 Oct 2025 10:27:50 +0000 Subject: [PATCH] Changes for new cute apis prefetch transpose vnni --- test/unit/cute/intel_xe/CMakeLists.txt | 3 + .../cute/intel_xe/xe_copy_prefetch_2d.cpp | 163 ++++++++++++++++++ test/unit/cute/intel_xe/xe_transpose_2d.cpp | 100 +++++++++++ test/unit/cute/intel_xe/xe_vnni_2d.cpp | 70 ++++++++ 4 files changed, 336 insertions(+) create mode 100644 test/unit/cute/intel_xe/xe_copy_prefetch_2d.cpp create mode 100644 test/unit/cute/intel_xe/xe_transpose_2d.cpp create mode 100644 test/unit/cute/intel_xe/xe_vnni_2d.cpp diff --git a/test/unit/cute/intel_xe/CMakeLists.txt b/test/unit/cute/intel_xe/CMakeLists.txt index cdd23be94c..4473388a62 100755 --- a/test/unit/cute/intel_xe/CMakeLists.txt +++ b/test/unit/cute/intel_xe/CMakeLists.txt @@ -47,6 +47,9 @@ cutlass_test_unit_add_executable( copy_scatter.cpp mma.cpp tiled_mma.cpp + xe_copy_prefetch_2d.cpp + xe_vnni_2d.cpp + xe_transpose_2d.cpp ) else() cutlass_test_unit_add_executable( diff --git a/test/unit/cute/intel_xe/xe_copy_prefetch_2d.cpp b/test/unit/cute/intel_xe/xe_copy_prefetch_2d.cpp new file mode 100644 index 0000000000..4ffcc03e47 --- /dev/null +++ b/test/unit/cute/intel_xe/xe_copy_prefetch_2d.cpp @@ -0,0 +1,163 @@ +/*************************************************************************************************** + * Copyright (C) 2025 Intel Corporation, All rights reserved. + * SPDX-License-Identifier: BSD-3-Clause + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#include "cutlass/detail/layout.hpp" + +#include +#include +#include +#include +#include +#include + +#include "cutlass_unit_test.h" +#include "utils.hpp" + +using namespace cute; +using namespace cutlass; +using namespace compat::experimental; + +#define SUBGROUP_SIZE (16) + +#if (IGC_VERSION_MAJOR > 2) || (IGC_VERSION_MAJOR == 2 && IGC_VERSION_MINOR >= 18) + +// Kernel name for unique identification +template +class XEPrefetch2DKernelName; + +// Device kernel for XE_PREFETCH_2D testing +template +void xe_prefetch_2d_kernel(SrcTensor src) { + using namespace cute; + using Element = typename SrcTensor::value_type; + + // Only execute with the first subgroup to avoid race conditions + if (sycl::ext::oneapi::this_work_item::get_nd_item<1>().get_group(0) == 0) { + // Get thread/subgroup information + auto local_id = int(sycl::ext::oneapi::this_work_item::get_nd_item<1>().get_local_id(0)); + + // Create block 2D prefetch inside kernel (device-only operation) + using PrefetchOp = XE_PREFETCH_2D; + auto tiled_prefetch = make_block_2d_copy(PrefetchOp{}, src); + + // Get thread slice of the tiled prefetch + auto thr_prefetch = tiled_prefetch.get_slice(local_id); + + // Create coordinate tensor for a single tile + auto coord_shape = make_shape(Int{}, Int>{}); + Tensor coord_tile = make_identity_tensor(coord_shape); + + // Partition source coordinates for prefetch + auto thr_src_coord = thr_prefetch.partition_S(coord_tile); + + // Create dummy destination fragment (prefetch ignores destination) + auto thr_dst_frag = thr_prefetch.partition_fragment_D(coord_tile); + + // Perform the prefetch operation + copy(tiled_prefetch, thr_src_coord, thr_dst_frag); + + // Synchronize to ensure all threads complete their operations + sycl::group_barrier(sycl::ext::oneapi::this_work_item::get_nd_item<1>().get_group()); + } +} + +// Host test function template for XE_PREFETCH_2D +template +void test_xe_prefetch_2d() { + using namespace cute; + + // Matrix dimensions - must be compatible with block 2D constraints + constexpr int M = Height; + constexpr int N = Width * sizeof_bits_v / Bits; + + // Ensure proper alignment (required for block 2D operations) + constexpr int elem_alignment = 16 / sizeof(Element); + constexpr int aligned_N = ((N + elem_alignment - 1) / elem_alignment) * elem_alignment; + + // Allocate and initialize host data + cutlass::host_vector host_src(M * aligned_N); + + // Initialize source with test pattern + for (size_t i = 0; i < host_src.size(); ++i) { + host_src[i] = static_cast(i % 256); + } + + // Copy to device + cutlass::device_vector device_src = host_src; + + // Create tensors with proper layout + Tensor tensor_src = + make_tensor(make_gmem_ptr(device_src.data()), + make_layout(Shape, Int>{}, Stride, _1>{})); + + // Launch kernel - prefetch happens on device + auto blockDim = compat::dim3(SUBGROUP_SIZE); + auto gridDim = compat::dim3(1); + + launch, + XEPrefetch2DKernelName>( + launch_policy{ + gridDim, blockDim, + kernel_properties{sycl_exp::sub_group_size} + }, + tensor_src); + + compat::wait_and_throw(); + + // Note: XE_PREFETCH_2D just prefetches to cache, no verification needed + EXPECT_TRUE(true) << "XE_PREFETCH_2D operation completed successfully"; +} + +TEST(PVC_CuTe_Xe, XE_PREFETCH_2D_uint8) { + test_xe_prefetch_2d(); + test_xe_prefetch_2d(); +} + +TEST(PVC_CuTe_Xe, XE_PREFETCH_2D_int16) { + test_xe_prefetch_2d(); + test_xe_prefetch_2d(); +} + +TEST(PVC_CuTe_Xe, XE_PREFETCH_2D_float) { + test_xe_prefetch_2d(); + test_xe_prefetch_2d(); +} + +#else + +// For the fallback case +#include "cutlass_unit_test.h" + +TEST(PVC_CuTe_Xe, XE_PREFETCH_2D_SKIPPED) { + GTEST_SKIP() << "XE_PREFETCH_2D tests require IGC version 2.18 or higher. skipped"; +} + +#endif \ No newline at end of file diff --git a/test/unit/cute/intel_xe/xe_transpose_2d.cpp b/test/unit/cute/intel_xe/xe_transpose_2d.cpp new file mode 100644 index 0000000000..b30bcd536e --- /dev/null +++ b/test/unit/cute/intel_xe/xe_transpose_2d.cpp @@ -0,0 +1,100 @@ +/*************************************************************************************************** + * Copyright (C) 2025 Intel Corporation, All rights reserved. + * SPDX-License-Identifier: BSD-3-Clause + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#include +#include +#include +#include +#include +#include "cutlass_unit_test.h" + +using namespace cute; + +#if (IGC_VERSION_MAJOR > 2) || (IGC_VERSION_MAJOR == 2 && IGC_VERSION_MINOR >= 18) + +TEST(PVC_CuTe_Xe, XE_LOAD_2D_TRANSPOSE_API_Declaration) { + // Template: XE_LOAD_2D_TRANSPOSE + // Constraints: Bits == 32 || Bits == 64, Width <= 8 + // For 64-bit: Height == 8 && Width < 4 + + // Test 32-bit transpose operations + using TransposeOp_32bit_2x4 = XE_LOAD_2D_TRANSPOSE<32, 2, 4>; + using TransposeOp_32bit_4x8 = XE_LOAD_2D_TRANSPOSE<32, 4, 8>; + using TransposeOp_32bit_8x2 = XE_LOAD_2D_TRANSPOSE<32, 8, 2>; + + // Test 64-bit transpose operations (limited constraints) + using TransposeOp_64bit_8x2 = XE_LOAD_2D_TRANSPOSE<64, 8, 2>; + using TransposeOp_64bit_8x3 = XE_LOAD_2D_TRANSPOSE<64, 8, 3>; + + // Test that the operations have the required static members from XE_Copy_Op_2D_Base + static_assert(TransposeOp_32bit_2x4::AtomHeight == 2); + static_assert(TransposeOp_32bit_2x4::AtomWidth == 4); + static_assert(TransposeOp_32bit_2x4::CopyBits == 32); + + static_assert(TransposeOp_32bit_4x8::AtomHeight == 4); + static_assert(TransposeOp_32bit_4x8::AtomWidth == 8); + static_assert(TransposeOp_32bit_4x8::CopyBits == 32); + + static_assert(TransposeOp_64bit_8x2::AtomHeight == 8); + static_assert(TransposeOp_64bit_8x2::AtomWidth == 2); + static_assert(TransposeOp_64bit_8x2::CopyBits == 64); + + EXPECT_TRUE(true) << "XE_LOAD_2D_TRANSPOSE API types declared successfully"; +} + +TEST(PVC_CuTe_Xe, XE_LOAD_2D_TRANSPOSE_Constraints) { + // Test that the compile-time constraints are enforced + + // Valid 32-bit operations + using Valid32_1 = XE_LOAD_2D_TRANSPOSE<32, 1, 1>; + using Valid32_2 = XE_LOAD_2D_TRANSPOSE<32, 16, 8>; // Width <= 8 + + // Valid 64-bit operations (Height == 8 && Width < 4) + using Valid64_1 = XE_LOAD_2D_TRANSPOSE<64, 8, 1>; + using Valid64_2 = XE_LOAD_2D_TRANSPOSE<64, 8, 2>; + using Valid64_3 = XE_LOAD_2D_TRANSPOSE<64, 8, 3>; + + static_assert(Valid32_1::CopyBits == 32); + static_assert(Valid32_2::CopyBits == 32); + static_assert(Valid64_1::CopyBits == 64); + static_assert(Valid64_2::CopyBits == 64); + static_assert(Valid64_3::CopyBits == 64); + + EXPECT_TRUE(true) << "XE_LOAD_2D_TRANSPOSE constraint validation successful"; +} + +#else + +TEST(PVC_CuTe_Xe, XE_LOAD_2D_TRANSPOSE_SKIPPED) { + GTEST_SKIP() << "XE_LOAD_2D_TRANSPOSE tests require IGC version 2.18 or higher. skipped"; +} + +#endif \ No newline at end of file diff --git a/test/unit/cute/intel_xe/xe_vnni_2d.cpp b/test/unit/cute/intel_xe/xe_vnni_2d.cpp new file mode 100644 index 0000000000..ebc50d9af9 --- /dev/null +++ b/test/unit/cute/intel_xe/xe_vnni_2d.cpp @@ -0,0 +1,70 @@ +/*************************************************************************************************** + * Copyright (C) 2025 Intel Corporation, All rights reserved. + * SPDX-License-Identifier: BSD-3-Clause + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#include +#include +#include +#include +#include +#include "cutlass_unit_test.h" + +using namespace cute; + +#if (IGC_VERSION_MAJOR > 2) || (IGC_VERSION_MAJOR == 2 && IGC_VERSION_MINOR >= 18) + +TEST(PVC_CuTe_Xe, XE_LOAD_2D_VNNI_API_Declaration) { + // Template: XE_LOAD_2D_VNNI + + // Test that the VNNI operation types can be declared + using VNNIOp_8bit_2x32 = XE_LOAD_2D_VNNI<8, 2, 32>; + using VNNIOp_8bit_4x32 = XE_LOAD_2D_VNNI<8, 4, 32>; + using VNNIOp_16bit_2x16 = XE_LOAD_2D_VNNI<16, 2, 16>; + using VNNIOp_16bit_4x16 = XE_LOAD_2D_VNNI<16, 4, 16>; + + // Test that the operations have the required static members from XE_Copy_Op_2D_Base + static_assert(VNNIOp_8bit_2x32::AtomHeight == 2); + static_assert(VNNIOp_8bit_2x32::AtomWidth == 32); + static_assert(VNNIOp_8bit_2x32::CopyBits == 8); + + static_assert(VNNIOp_16bit_2x16::AtomHeight == 2); + static_assert(VNNIOp_16bit_2x16::AtomWidth == 16); + static_assert(VNNIOp_16bit_2x16::CopyBits == 16); + + EXPECT_TRUE(true) << "XE_LOAD_2D_VNNI API types declared successfully"; +} + +#else + +TEST(PVC_CuTe_Xe, XE_LOAD_2D_VNNI_SKIPPED) { + GTEST_SKIP() << "XE_LOAD_2D_VNNI tests require IGC version 2.18 or higher. skipped"; +} + +#endif \ No newline at end of file