diff --git a/CMakeLists.txt b/CMakeLists.txt index 7104450..df317d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,8 +7,14 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_FLAGS "-O3 -march=native") option(ENABLE_SANITIZERS "Enable Clang sanitizers" OFF) -option(ENABLE_ROCSPARSE "Enable rocSPARSE" OFF) -option(ENABLE_CUSPARSE "Enable cuSPARSE" OFF) +option(ENABLE_SYCL_REFERENCE "Enable SYCL kernels in reference backend" OFF) + +# Vendor backends +option(ENABLE_ONEMKL_SYCL "Enable oneMKL (SYCL) vendor backend" OFF) +option(ENABLE_ARMPL "Enable ArmPL vendor backend" OFF) +option(ENABLE_ROCSPARSE "Enable rocSPARSE vendor backend" OFF) +option(ENABLE_CUSPARSE "Enable cuSPARSE vendor backend" OFF) +option(ENABLE_AOCLSPARSE "Enable AOCL-Sparse vendor backend" OFF) # Get includes, which declares the `spblas` library add_subdirectory(include) @@ -20,9 +26,43 @@ endif() # Download dependencies include(FetchContent) +# Enable sanitizers +if (ENABLE_SANITIZERS) + set(SANITIZER_FLAGS "-fsanitize=address,undefined") + target_compile_options(spblas INTERFACE ${SANITIZER_FLAGS} -g -O1 -fno-omit-frame-pointer) + target_link_options(spblas INTERFACE ${SANITIZER_FLAGS}) +endif() + +# Initialize backend flags set(SPBLAS_CPU_BACKEND OFF) set(SPBLAS_GPU_BACKEND OFF) +if (ENABLE_SYCL_REFERENCE) + if (ENABLE_ONEMKL_SYCL OR ENABLE_ARMPL OR ENABLE_ROCSPARSE OR ENABLE_CUSPARSE OR ENABLE_AOCLSPARSE) + message(FATAL_ERROR "SYCL reference backend cannot be enabled together with vendor backends") + endif() + + # Check for SYCL support + include(CheckCXXCompilerFlag) + check_cxx_compiler_flag("-fsycl" COMPILER_SUPPORTS_SYCL) + + FetchContent_Declare( + sycl_thrust + GIT_REPOSITORY https://github.com/SparseBLAS/sycl-thrust.git + GIT_TAG main) + FetchContent_MakeAvailable(sycl_thrust) + + if(COMPILER_SUPPORTS_SYCL) + target_compile_options(spblas INTERFACE -fsycl -fsycl-device-code-split=per_kernel) + # target_compile_options(spblas INTERFACE -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=sm_90) + + else() + message(FATAL_ERROR "Compiler does not support SYCL (-fsycl flag not available)") + endif() + + target_compile_definitions(spblas INTERFACE SPBLAS_ENABLE_SYCL_REFERENCE) +endif() + if (ENABLE_ONEMKL_SYCL) set(SPBLAS_CPU_BACKEND ON) set(SPBLAS_GPU_BACKEND ON) @@ -47,6 +87,23 @@ if (ENABLE_ARMPL) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSPBLAS_ENABLE_ARMPL") endif() +if (ENABLE_ROCSPARSE) + set(SPBLAS_GPU_BACKEND ON) + project(spblas LANGUAGES HIP) + find_package(hip REQUIRED) + find_package(rocsparse REQUIRED) + target_link_libraries(spblas INTERFACE roc::rocsparse hip::host) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSPBLAS_ENABLE_ROCSPARSE") + set(CMAKE_HIP_FLAGS "${CMAKE_CXX_FLAGS}") +endif() + +if (ENABLE_CUSPARSE) + set(SPBLAS_GPU_BACKEND ON) + find_package(CUDAToolkit REQUIRED) + target_link_libraries(spblas INTERFACE CUDA::cudart CUDA::cusparse CUDA::cublas) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSPBLAS_ENABLE_CUSPARSE") +endif() + if (ENABLE_AOCLSPARSE) set(SPBLAS_CPU_BACKEND ON) if (NOT DEFINED ENV{AOCLSPARSE_DIR}) @@ -77,23 +134,6 @@ if (ENABLE_AOCLSPARSE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSPBLAS_ENABLE_AOCLSPARSE") endif() -if (ENABLE_ROCSPARSE) - set(SPBLAS_GPU_BACKEND ON) - project(spblas LANGUAGES HIP) - find_package(hip REQUIRED) - find_package(rocsparse REQUIRED) - target_link_libraries(spblas INTERFACE roc::rocsparse hip::host) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSPBLAS_ENABLE_ROCSPARSE") - set(CMAKE_HIP_FLAGS "${CMAKE_CXX_FLAGS}") -endif() - -if (ENABLE_CUSPARSE) - set(SPBLAS_GPU_BACKEND ON) - find_package(CUDAToolkit REQUIRED) - target_link_libraries(spblas INTERFACE CUDA::cudart CUDA::cusparse CUDA::cublas) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSPBLAS_ENABLE_CUSPARSE") -endif() - # If no vendor backend is enabled, enable CPU backend for reference implementation if (NOT ENABLE_ONEMKL_SYCL AND NOT ENABLE_ARMPL AND @@ -108,13 +148,6 @@ if (LOG_LEVEL) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLOG_LEVEL=${LOG_LEVEL}") # SPBLAS_DEBUG | SPBLAS_WARNING | SPBLAS_TRACE | SPBLAS_INFO endif() -# Enable sanitizers -if (ENABLE_SANITIZERS) - set(SANITIZER_FLAGS "-fsanitize=address,undefined") - target_compile_options(spblas INTERFACE ${SANITIZER_FLAGS} -g -O1 -fno-omit-frame-pointer) - target_link_options(spblas INTERFACE ${SANITIZER_FLAGS}) -endif() - # mdspan FetchContent_Declare( mdspan diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index fcf3a82..426df0d 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -23,3 +23,7 @@ if (SPBLAS_GPU_BACKEND) add_subdirectory(rocsparse) endif() endif() + +if (ENABLE_SYCL_REFERENCE) + add_subdirectory(sycl_reference) +endif() diff --git a/examples/device/CMakeLists.txt b/examples/device/CMakeLists.txt index cb79234..bc87a80 100644 --- a/examples/device/CMakeLists.txt +++ b/examples/device/CMakeLists.txt @@ -14,3 +14,4 @@ function(add_device_example example_name) endfunction() add_device_example(device_spmv) +add_device_example(spmm_benchmark) diff --git a/examples/device/spmm_benchmark.cpp b/examples/device/spmm_benchmark.cpp new file mode 100644 index 0000000..7d59886 --- /dev/null +++ b/examples/device/spmm_benchmark.cpp @@ -0,0 +1,162 @@ +#include + +#include + +#include +#include +#include + +#include +#include + +int main(int argc, char** argv) { + using value_t = float; + using index_t = spblas::index_t; + using offset_t = spblas::offset_t; + namespace md = spblas::__mdspan; + + offset_t nnz_row = 100; + + index_t m = 100000; + index_t n = 1; + index_t k = 100000; + + if (argc >= 2) { + m = std::atoll(argv[1]); + } + + if (argc >= 3) { + k = std::atoll(argv[2]); + } + + if (argc >= 4) { + n = std::atoll(argv[3]); + } + + if (argc >= 5) { + nnz_row = std::atoll(argv[4]); + } + + fmt::print("Multiplying {} x {} matrix with {} nnz/row by {} columns.\n", m, + k, nnz_row, n); + + offset_t nnz_in = m * nnz_row; + + auto&& [values, rowptr, colind, shape, nnz] = + spblas::generate_csr(m, k, nnz_in); + + // Copy data to the GPU + thrust::device_vector d_values(values); + thrust::device_vector d_rowptr(rowptr); + thrust::device_vector d_colind(colind); + + spblas::csr_view a( + d_values.data().get(), d_rowptr.data().get(), d_colind.data().get(), + shape, nnz); + + std::vector b_values(k * n, 1); + std::vector c_values(m * n, 0); + + thrust::device_vector d_b(b_values); + thrust::device_vector d_c(c_values); + + md::mdspan b(d_b.data().get(), k, n); + md::mdspan c(d_c.data().get(), m, n); + + // Perform computation on the GPU. + spblas::multiply(thrust::device, a, b, c); + + // Copy the result back to the CPU. + thrust::copy(d_c.begin(), d_c.end(), c_values.begin()); + + std::vector c_ref(m * n, 0); + + spblas::csr_view a_view( + values.data(), rowptr.data(), colind.data(), shape, nnz); + md::mdspan b_view(b_values.data(), k, n); + md::mdspan c_view(c_ref.data(), m, n); + + // Perform reference computation on CPU. + spblas::multiply(a_view, b_view, c_view); + + // Compare results + const float epsilon = 64 * std::numeric_limits::epsilon(); + const float abs_th = std::numeric_limits::min(); + bool results_match = true; + + for (std::size_t i = 0; i < c_ref.size(); ++i) { + float diff = std::abs(c_ref[i] - c_values[i]); + float norm = std::min(std::abs(c_ref[i]) + std::abs(c_values[i]), + std::numeric_limits::max()); + float abs_error = std::max(abs_th, epsilon * norm); + + if (diff > abs_error) { + results_match = false; + std::cout << "Mismatch at index " << i << ": " + << "SYCL result = " << c_values[i] + << ", Reference = " << c_ref[i] << "\n"; + break; + } + } + + if (results_match) { + fmt::print("OK!\n"); + } else { + fmt::print("Error!\n"); + return 1; + } + + // Warmup: call `SpMM` repeatedly for at least 2 seconds. + + double min_warmup_duration = 2; + auto warmup_begin = std::chrono::high_resolution_clock::now(); + auto warmup_end = warmup_begin; + + while (std::chrono::duration(warmup_end - warmup_begin).count() < + min_warmup_duration) { + spblas::multiply(thrust::device, a, b, c); + warmup_end = std::chrono::high_resolution_clock::now(); + } + + double gb = 1e-9 * (nnz * sizeof(value_t) + nnz * sizeof(index_t) + + (m + 1) * sizeof(offset_t) + k * n * sizeof(value_t) + + m * n * sizeof(value_t)); + + double gflops = 1e-9 * 2 * nnz * n; + + double max_bw = 456; + + std::size_t n_iterations = 10; + + std::vector durations; + durations.reserve(n_iterations); + + for (std::size_t i = 0; i < n_iterations; i++) { + auto begin = std::chrono::high_resolution_clock::now(); + spblas::multiply(thrust::device, a, b, c); + auto end = std::chrono::high_resolution_clock::now(); + double duration = std::chrono::duration(end - begin).count(); + double gb_s = gb / duration; + double gflops_s = gflops / duration; + + fmt::print("Completed in {} s (achieved {} GB/s)\n", duration, gb_s); + fmt::print("Achieved {} GFLOPs\n", gflops_s); + + durations.push_back(duration); + } + + fmt::print("Durations: {}\n", durations); + + std::sort(durations.begin(), durations.end()); + + double median_duration = durations[durations.size() / 2]; + + double median_gb_s = gb / median_duration; + double median_gflops_s = gflops / median_duration; + + fmt::print("Median duration {} ({} GB/s) {}% of peak\n", median_duration, + median_gb_s, 100 * (median_gb_s / max_bw)); + fmt::print("Median achieved {} GFLOPs\n", median_gflops_s); + + return 0; +} diff --git a/examples/sycl_reference/CMakeLists.txt b/examples/sycl_reference/CMakeLists.txt new file mode 100644 index 0000000..3638fe7 --- /dev/null +++ b/examples/sycl_reference/CMakeLists.txt @@ -0,0 +1,7 @@ + +function(add_sycl_example example_name) + add_executable(${example_name} ${example_name}.cpp) + target_link_libraries(${example_name} spblas fmt sycl_thrust) +endfunction() + +add_sycl_example(sycl_spmm) diff --git a/examples/sycl_reference/sycl_spmm.cpp b/examples/sycl_reference/sycl_spmm.cpp new file mode 100644 index 0000000..96cf0a4 --- /dev/null +++ b/examples/sycl_reference/sycl_spmm.cpp @@ -0,0 +1,207 @@ +#include +#include + +#include + +#include + +#include + +#include +#include +#include + +int main(int argc, char** argv) { + using value_t = float; + using index_t = int32_t; + using offset_t = int32_t; + namespace md = spblas::__mdspan; + + offset_t nnz_row = 100; + + index_t m = 100000; + index_t n = 1; + index_t k = 100000; + + char method = 'k'; + + std::size_t wg_size = 32; + + if (argc >= 2) { + m = std::atoll(argv[1]); + } + + if (argc >= 3) { + k = std::atoll(argv[2]); + } + + if (argc >= 4) { + n = std::atoll(argv[3]); + } + + if (argc >= 5) { + nnz_row = std::atoll(argv[4]); + } + + if (argc >= 6) { + method = argv[5][0]; + } + + if (argc >= 7) { + wg_size = std::atoll(argv[6]); + } + + assert(method == 'k' || method == 'r' || method == 'j' || method == 's'); + + fmt::print("Multiplying {} x {} matrix with {} nnz/row by {} columns.\n", m, + k, nnz_row, n); + fmt::print("Using method {} with WG size {}\n", method, wg_size); + + offset_t nnz_in = m * nnz_row; + + auto&& [values, rowptr, colind, shape, nnz] = + spblas::generate_csr(m, k, nnz_in); + + thrust::device_vector d_values(values); + thrust::device_vector d_rowptr(rowptr); + thrust::device_vector d_colind(colind); + + spblas::csr_view a( + d_values.data().get(), d_rowptr.data().get(), d_colind.data().get(), + shape, nnz); + + std::vector b_values(k * n, 1); + std::vector c_values(m * n, 0); + + for (std::size_t k_ = 0; k_ < k; k_++) { + for (std::size_t j = 0; j < n; j++) { + b_values[k_ * n + j] = 10 * drand48(); + } + } + + thrust::device_vector d_b(b_values); + thrust::device_vector d_c(c_values); + + md::mdspan b(d_b.data().get(), k, n); + md::mdspan c(d_c.data().get(), m, n); + + sycl::queue q(sycl::gpu_selector_v); + + if (method == 'k') { + spblas::spmm_wgsplitk(q, a, b, c, wg_size); + } else if (method == 'r') { + spblas::spmm_wgsplitk_reorder(q, a, b, c, wg_size); + } else if (method == 'j') { + spblas::spmm_wgsplitj(q, a, b, c, wg_size); + } else if (method == 's') { + spblas::spmm_wgsplitk_smem(q, a, b, c, wg_size); + } + + thrust::copy(d_c.begin(), d_c.end(), c_values.begin()); + + std::vector c_ref(m * n, 0); + + spblas::csr_view a_view( + values.data(), rowptr.data(), colind.data(), shape, nnz); + md::mdspan b_view(b_values.data(), k, n); + md::mdspan c_view(c_ref.data(), m, n); + + spblas::multiply(a_view, b_view, c_view); + + // Compare results + const float epsilon = 64 * std::numeric_limits::epsilon(); + const float abs_th = std::numeric_limits::min(); + bool results_match = true; + + for (std::size_t i = 0; i < c_ref.size(); ++i) { + float diff = std::abs(c_ref[i] - c_values[i]); + float norm = std::min(std::abs(c_ref[i]) + std::abs(c_values[i]), + std::numeric_limits::max()); + float abs_error = std::max(abs_th, epsilon * norm); + + if (diff > abs_error) { + results_match = false; + std::cout << "Mismatch at index " << i << ": " + << "SYCL result = " << c_values[i] + << ", Reference = " << c_ref[i] << "\n"; + break; + } + } + + if (results_match) { + fmt::print("OK!\n"); + } else { + fmt::print("Error!\n"); + return 1; + } + + // Warmup: call `SpMM` repeatedly for at least 2 seconds. + + double min_warmup_duration = 2; + auto warmup_begin = std::chrono::high_resolution_clock::now(); + auto warmup_end = warmup_begin; + + while (std::chrono::duration(warmup_end - warmup_begin).count() < + min_warmup_duration) { + if (method == 'k') { + spblas::spmm_wgsplitk(q, a, b, c, wg_size); + } else if (method == 'r') { + spblas::spmm_wgsplitk_reorder(q, a, b, c, wg_size); + } else if (method == 'j') { + spblas::spmm_wgsplitj(q, a, b, c, wg_size); + } else if (method == 's') { + spblas::spmm_wgsplitk_smem(q, a, b, c, wg_size); + } + warmup_end = std::chrono::high_resolution_clock::now(); + } + + double gb = 1e-9 * (nnz * sizeof(value_t) + nnz * sizeof(index_t) + + (m + 1) * sizeof(offset_t) + k * n * sizeof(value_t) + + m * n * sizeof(value_t)); + + double gflops = 1e-9 * 2 * nnz * n; + + double max_bw = 456; + + std::size_t n_iterations = 10; + + std::vector durations; + durations.reserve(n_iterations); + + for (std::size_t i = 0; i < n_iterations; i++) { + auto begin = std::chrono::high_resolution_clock::now(); + if (method == 'k') { + spblas::spmm_wgsplitk(q, a, b, c, wg_size); + } else if (method == 'r') { + spblas::spmm_wgsplitk_reorder(q, a, b, c, wg_size); + } else if (method == 'j') { + spblas::spmm_wgsplitj(q, a, b, c, wg_size); + } else if (method == 's') { + spblas::spmm_wgsplitk_smem(q, a, b, c, wg_size); + } + auto end = std::chrono::high_resolution_clock::now(); + double duration = std::chrono::duration(end - begin).count(); + double gb_s = gb / duration; + double gflops_s = gflops / duration; + + fmt::print("Completed in {} s (achieved {} GB/s)\n", duration, gb_s); + fmt::print("Achieved {} GFLOPs\n", gflops_s); + + durations.push_back(duration); + } + + fmt::print("Durations: {}\n", durations); + + std::sort(durations.begin(), durations.end()); + + double median_duration = durations[durations.size() / 2]; + + double median_gb_s = gb / median_duration; + double median_gflops_s = gflops / median_duration; + + fmt::print("Median duration {} ({} GB/s) {}% of peak.\n", median_duration, + median_gb_s, 100 * (median_gb_s / max_bw)); + fmt::print("Median achieved {} GFLOPs\n", median_gflops_s); + + return 0; +} diff --git a/include/spblas/algorithms/algorithms.hpp b/include/spblas/algorithms/algorithms.hpp index b5fb92b..5724d75 100644 --- a/include/spblas/algorithms/algorithms.hpp +++ b/include/spblas/algorithms/algorithms.hpp @@ -8,6 +8,11 @@ #ifndef SPBLAS_VENDOR_BACKEND #include #include + +#ifdef SPBLAS_ENABLE_SYCL_REFERENCE +#include +#endif + #endif #include diff --git a/include/spblas/backend/sycl/multiply_impl.hpp b/include/spblas/backend/sycl/multiply_impl.hpp new file mode 100644 index 0000000..05ade7e --- /dev/null +++ b/include/spblas/backend/sycl/multiply_impl.hpp @@ -0,0 +1,3 @@ +#pragma once + +#include diff --git a/include/spblas/backend/sycl/spmm_impl.hpp b/include/spblas/backend/sycl/spmm_impl.hpp new file mode 100644 index 0000000..87156b7 --- /dev/null +++ b/include/spblas/backend/sycl/spmm_impl.hpp @@ -0,0 +1,225 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace spblas { + +// Optimizations: +// - Move reduction out of inner loop +// - LLC optimization (block __backend::shape(c)[1] into multiple kernels) +// * Copy optimization on B? +// * Transpose B? +// - L1 optimization: copy b to shared memory + +template + requires(__backend::row_iterable && __backend::lookupable && + __backend::lookupable) +void spmm_wgsplitk(sycl::queue& q, A&& a, B&& b, C&& c, + std::size_t wg_size = 32, std::size_t n_workgroups = 0) { + log_trace(""); + + if (n_workgroups == 0) { + n_workgroups = __backend::shape(a)[0]; + } + + q.parallel_for( + sycl::nd_range<1>{wg_size * n_workgroups, wg_size}, + [=](auto nd_idx) { + auto gid = nd_idx.get_group(0); + auto lid = nd_idx.get_local_id(0); + auto lsz = nd_idx.get_local_range(0); + + for (auto i = gid; i < __backend::shape(a)[0]; i += n_workgroups) { + auto row = __backend::lookup_row(a, i); + + for (auto elem_idx = lid; elem_idx < row.size(); elem_idx += lsz) { + auto&& [k, a_v] = row[elem_idx]; + + for (int j = 0; j < __backend::shape(c)[1]; j++) { + auto local_product = a_v * __backend::lookup(b, k, j); + auto group_sum = sycl::reduce_over_group( + nd_idx.get_group(), local_product, sycl::plus<>()); + if (lid == 0) { + __backend::lookup(c, i, j) += group_sum; + } + } + } + } + }) + .wait(); +} + +template + requires(__backend::row_iterable && __backend::lookupable && + __backend::lookupable) +void spmm_wgsplitk_reorder(sycl::queue& q, A&& a, B&& b, C&& c, + std::size_t wg_size = 32, + std::size_t n_workgroups = 0) { + log_trace(""); + + if (n_workgroups == 0) { + n_workgroups = __backend::shape(a)[0]; + } + + q.parallel_for( + sycl::nd_range<1>{wg_size * n_workgroups, wg_size}, + [=](auto nd_idx) { + auto gid = nd_idx.get_group(0); + auto lid = nd_idx.get_local_id(0); + auto lsz = nd_idx.get_local_range(0); + + for (auto i = gid; i < __backend::shape(a)[0]; i += n_workgroups) { + auto row = __backend::lookup_row(a, i); + + using T = std::remove_cvref_t(row[0]) * + __backend::lookup(b, 0, 0))>; + + for (int j = 0; j < __backend::shape(c)[1]; j++) { + T local_sum = 0; + for (auto elem_idx = lid; elem_idx < row.size(); elem_idx += lsz) { + auto&& [k, a_v] = row[elem_idx]; + + auto local_product = a_v * __backend::lookup(b, k, j); + local_sum += local_product; + } + + auto group_sum = sycl::reduce_over_group( + nd_idx.get_group(), local_sum, sycl::plus<>()); + if (lid == 0) { + __backend::lookup(c, i, j) += group_sum; + } + } + } + }) + .wait(); +} + +template + requires(__backend::row_iterable && __backend::lookupable && + __backend::lookupable) +void spmm_wgsplitk_smem(sycl::queue& q, A&& a, B&& b, C&& c, + std::size_t wg_size = 32, + std::size_t n_workgroups = 0) { + log_trace(""); + + if (n_workgroups == 0) { + n_workgroups = __backend::shape(a)[0]; + } + + using T = spblas::tensor_scalar_t; + + // We want each workgroup to use between + // 16 KiB (8 wg/Xe Core) -> 2 KiB (16 wg/Xe Core) + int j_bs = 2048 / sizeof(T); + + q.submit([&](auto&& h) { + sycl::local_accessor slm(j_bs, h); + + h.template parallel_for( + + sycl::nd_range<1>{wg_size * n_workgroups, wg_size}, [=](auto nd_idx) { + auto g = nd_idx.get_group(); + auto gid = nd_idx.get_group(0); + auto lid = nd_idx.get_local_id(0); + auto lsz = nd_idx.get_local_range(0); + + for (auto i = gid; i < __backend::shape(a)[0]; i += n_workgroups) { + auto row = __backend::lookup_row(a, i); + + for (int j_block = 0; j_block < __backend::shape(c)[1]; + j_block += j_bs) { + + // Initialize SLM + for (auto i = lid; i < slm.size(); i += lsz) { + slm[i] = 0; + } + + sycl::group_barrier(g); + + auto j_end = + std::min(j_block + j_bs, (int) __backend::shape(c)[1]); + + for (auto elem_idx = lid; elem_idx < row.size(); + elem_idx += lsz) { + auto&& [k, a_v] = row[elem_idx]; + + for (auto j_ = j_block; j_ < j_end; j_++) { + auto j = (j_ + lid) % j_end; + sycl::atomic_ref + c_ref(slm[j - j_block]); + c_ref += a_v * __backend::lookup(b, k, j); + // If WG size < SG size, atomics are unnecessary. + // slm[j - j_block] += a_v * __backend::lookup(b, k, j); + } + } + + sycl::group_barrier(g); + + for (auto j = lid; j < (j_end - j_block); j += lsz) { + __backend::lookup(c, i, j_block + j) = slm[j]; + } + + sycl::group_barrier(g); + } + } + }); + }).wait(); +} + +template + requires(__backend::row_iterable && __backend::lookupable && + __backend::lookupable) +void spmm_wgsplitj(sycl::queue& q, A&& a, B&& b, C&& c, + std::size_t wg_size = 32, std::size_t n_workgroups = 0) { + log_trace(""); + + if (n_workgroups == 0) { + n_workgroups = __backend::shape(a)[0]; + } + + int j_bs = __backend::shape(c)[1]; + // std::size_t j_bs = wg_size; + + auto max_slm_size = + q.get_device().get_info(); + + q.submit([&](auto&& h) { + // sycl::local_accessor slm(max_slm_size, h); + + h.template parallel_for( + sycl::nd_range<1>{wg_size * n_workgroups, wg_size}, [=](auto nd_idx) { + auto gid = nd_idx.get_group(0); + auto lid = nd_idx.get_local_id(0); + auto lsz = nd_idx.get_local_range(0); + + for (int j_block = 0; j_block < __backend::shape(c)[1]; + j_block += j_bs) { + + for (auto i = gid; i < __backend::shape(a)[0]; i += n_workgroups) { + auto row = __backend::lookup_row(a, i); + + for (auto&& [k, a_v] : row) { + for (auto j = j_block + lid; + j < + std::min(j_block + j_bs, (int) __backend::shape(c)[1]); + j += lsz) { + __backend::lookup(c, i, j) += + a_v * __backend::lookup(b, k, j); + } + } + } + } + }); + }).wait(); +} + +} // namespace spblas diff --git a/scripts/plot_results.py b/scripts/plot_results.py new file mode 100644 index 0000000..dbafcd7 --- /dev/null +++ b/scripts/plot_results.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python3 +""" +Performance plotting script for SPMM benchmark results. +Parses MKL and SYCL experiment results and creates comparison plots. +""" + +import re +import os +import matplotlib.pyplot as plt +from collections import defaultdict +from dataclasses import dataclass +from typing import List, Dict, Tuple, Optional + +@dataclass +class ExperimentResult: + """Container for experiment parameters and results.""" + m: int + k: int + n: int + nnz_row: int + gb_per_s: float + gflops: float + method: Optional[str] = None # Only for SYCL + wg_size: Optional[int] = None # Only for SYCL + +def parse_mkl_results(filename: str) -> List[ExperimentResult]: + """Parse MKL benchmark results from output file.""" + results = [] + current_params = None + + with open(filename, 'r') as f: + for line in f: + line = line.strip() + + # Parse command line to extract parameters + if line.startswith('./spmm_benchmark'): + parts = line.split() + if len(parts) >= 5: + m = int(parts[1]) + k = int(parts[2]) + n = int(parts[3]) + nnz_row = int(parts[4]) + current_params = (m, k, n, nnz_row) + + # Parse median results + elif line.startswith('Median duration') and current_params: + # Extract GB/s from line like: "Median duration 0.00017339 (80.74285714285715 GB/s) 17.70% of peak" + gb_match = re.search(r'\(([0-9.]+) GB/s\)', line) + if gb_match: + gb_per_s = float(gb_match.group(1)) + + # Look for the corresponding GFLOPs line (should be next) + continue + + elif line.startswith('Median achieved') and current_params: + # Extract GFLOPs from line like: "Median achieved 18.455504931080224 GFLOPs" + gflops_match = re.search(r'Median achieved ([0-9.]+) GFLOPs', line) + if gflops_match and gb_per_s: + gflops = float(gflops_match.group(1)) + + m, k, n, nnz_row = current_params + results.append(ExperimentResult( + m=m, k=k, n=n, nnz_row=nnz_row, + gb_per_s=gb_per_s, gflops=gflops + )) + current_params = None # Reset for next experiment + + return results + +def parse_sycl_results(filename: str) -> List[ExperimentResult]: + """Parse SYCL benchmark results from output file.""" + results = [] + current_params = None + gb_per_s = None + + with open(filename, 'r') as f: + for line in f: + line = line.strip() + + # Parse command line to extract parameters + if line.startswith('./sycl_spmm'): + parts = line.split() + if len(parts) >= 7: + m = int(parts[1]) + k = int(parts[2]) + n = int(parts[3]) + nnz_row = int(parts[4]) + method = parts[5] + wg_size = int(parts[6]) + current_params = (m, k, n, nnz_row, method, wg_size) + + # Parse median results + elif line.startswith('Median duration') and current_params: + # Extract GB/s from line like: "Median duration 0.000161946 (86.44859397576971 GB/s) 18.95802499468634% of peak." + gb_match = re.search(r'\(([0-9.]+) GB/s\)', line) + if gb_match: + gb_per_s = float(gb_match.group(1)) + + elif line.startswith('Median achieved') and current_params and gb_per_s: + # Extract GFLOPs from line like: "Median achieved 19.759672977412226 GFLOPs" + gflops_match = re.search(r'Median achieved ([0-9.]+) GFLOPs', line) + if gflops_match: + gflops = float(gflops_match.group(1)) + + m, k, n, nnz_row, method, wg_size = current_params + results.append(ExperimentResult( + m=m, k=k, n=n, nnz_row=nnz_row, + gb_per_s=gb_per_s, gflops=gflops, + method=method, wg_size=wg_size + )) + current_params = None # Reset for next experiment + gb_per_s = None + + return results + +def find_best_sycl_performance(sycl_results: List[ExperimentResult]) -> Dict[Tuple[int, int, int, int], Tuple[ExperimentResult, str]]: + """ + Find the best SYCL performance for each (m, k, n, nnz_row) combination. + Returns dict mapping parameters to (best_result, "method-wg_size" label). + """ + # Group by parameters + param_groups = defaultdict(list) + for result in sycl_results: + key = (result.m, result.k, result.n, result.nnz_row) + param_groups[key].append(result) + + # Find best performance for each parameter combination + best_results = {} + for params, results in param_groups.items(): + best_result = max(results, key=lambda r: r.gb_per_s) + label = f"{best_result.method}-{best_result.wg_size}" + best_results[params] = (best_result, label) + + return best_results + +def create_plots(mkl_results: List[ExperimentResult], sycl_results: List[ExperimentResult]): + """Create performance plots for each n value.""" + # Get all unique n values + all_n_values = set() + for result in mkl_results + sycl_results: + all_n_values.add(result.n) + + # Find best SYCL performance for each parameter combination + best_sycl = find_best_sycl_performance(sycl_results) + + # Create plots for each n value + for n in sorted(all_n_values): + fig, ax = plt.subplots(figsize=(10, 6)) + + # Filter results for this n value + mkl_n_results = [r for r in mkl_results if r.n == n] + sycl_n_results = [(params, result, label) for params, (result, label) in best_sycl.items() if params[2] == n] + + if not mkl_n_results and not sycl_n_results: + continue + + # Prepare MKL data + mkl_nnz_rows = [r.nnz_row for r in mkl_n_results] + mkl_gb_per_s = [r.gb_per_s for r in mkl_n_results] + + # Prepare SYCL data + sycl_nnz_rows = [params[3] for params, result, label in sycl_n_results] + sycl_gb_per_s = [result.gb_per_s for params, result, label in sycl_n_results] + sycl_labels = [label for params, result, label in sycl_n_results] + + # Plot lines + if mkl_nnz_rows: + # Sort MKL data by nnz_row for proper line plotting + mkl_sorted = sorted(zip(mkl_nnz_rows, mkl_gb_per_s)) + mkl_nnz_rows_sorted, mkl_gb_per_s_sorted = zip(*mkl_sorted) + ax.plot(mkl_nnz_rows_sorted, mkl_gb_per_s_sorted, '-o', label='MKL', markerfacecolor='white') + + if sycl_nnz_rows: + # Sort SYCL data by nnz_row for proper line plotting + sycl_sorted = sorted(zip(sycl_nnz_rows, sycl_gb_per_s, sycl_labels)) + sycl_nnz_rows_sorted, sycl_gb_per_s_sorted, sycl_labels_sorted = zip(*sycl_sorted) + ax.plot(sycl_nnz_rows_sorted, sycl_gb_per_s_sorted, '-s', label='SYCL', markerfacecolor='white') + + # Add annotations for SYCL points + for x, y, label in zip(sycl_nnz_rows_sorted, sycl_gb_per_s_sorted, sycl_labels_sorted): + ax.annotate(label, (x, y), textcoords="offset points", xytext=(0,10), ha='center', fontsize=8) + + # Customize plot + ax.set_xlabel('nnz_row') + ax.set_ylabel('GB/s') + ax.set_title(f'SPMM Performance Comparison (n={n})') + ax.legend() + ax.grid(True, alpha=0.3) + + # Set x-axis to log scale if there's a wide range + if mkl_nnz_rows or sycl_nnz_rows: + all_nnz = (mkl_nnz_rows or []) + (sycl_nnz_rows or []) + if max(all_nnz) / min(all_nnz) > 10: + ax.set_xscale('log') + + plt.tight_layout() + + # Save plot + output_filename = f'spmm_performance_n{n}.png' + plt.savefig(output_filename, dpi=300, bbox_inches='tight') + print(f"Saved plot: {output_filename}") + + plt.show() + +def main(): + """Main function to parse data and create plots.""" + script_dir = os.path.dirname(os.path.abspath(__file__)) + + mkl_file = os.path.join(script_dir, 'spmm_experiments_mkl.out') + sycl_file = os.path.join(script_dir, 'spmm_experiments_sycl.out') + + # Check if files exist + if not os.path.exists(mkl_file): + print(f"Error: MKL results file not found: {mkl_file}") + return + + if not os.path.exists(sycl_file): + print(f"Error: SYCL results file not found: {sycl_file}") + return + + # Parse results + print("Parsing MKL results...") + mkl_results = parse_mkl_results(mkl_file) + print(f"Found {len(mkl_results)} MKL results") + + print("Parsing SYCL results...") + sycl_results = parse_sycl_results(sycl_file) + print(f"Found {len(sycl_results)} SYCL results") + + # Create plots + print("Creating plots...") + create_plots(mkl_results, sycl_results) + + print("Done!") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/spmm_experiments_mkl.out b/scripts/spmm_experiments_mkl.out new file mode 100644 index 0000000..7b15c55 --- /dev/null +++ b/scripts/spmm_experiments_mkl.out @@ -0,0 +1,733 @@ +[level_zero:gpu][level_zero:0] Intel(R) oneAPI Unified Runtime over Level-Zero, Intel(R) Arc(TM) B580 Graphics 20.1.0 [1.13.0] +[opencl:cpu][opencl:0] Intel(R) OpenCL, Intel(R) Core(TM) Ultra 9 285K OpenCL 3.0 (Build 0) [2025.19.4.0.18_160000.xmain-hotfix] +[opencl:gpu][opencl:1] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) B580 Graphics OpenCL 3.0 NEO [25.37.0] +Starting SPMM benchmark tests at Fri Sep 19 08:48:13 PM PDT 2025 +./spmm_benchmark 100000 100000 1 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 1 columns. +OK! +Completed in 0.000172499 s (achieved 81.15991397051577 GB/s) +Achieved 18.55083217873727 GFLOPs +Completed in 0.00017339000000000002 s (achieved 80.74285714285715 GB/s) +Achieved 18.455504931080224 GFLOPs +Completed in 0.00017273400000000002 s (achieved 81.04949807218034 GB/s) +Achieved 18.525594266328575 GFLOPs +Completed in 0.00017025000000000002 s (achieved 82.23203524229075 GB/s) +Achieved 18.79588839941263 GFLOPs +Completed in 0.000168794 s (achieved 82.9413604748984 GB/s) +Achieved 18.958019834828253 GFLOPs +Completed in 0.000173627 s (achieved 80.63264354046318 GB/s) +Achieved 18.430313257730653 GFLOPs +Completed in 0.00025067000000000004 s (achieved 55.850337096581164 GB/s) +Achieved 12.765787688993496 GFLOPs +Completed in 0.000174804 s (achieved 80.08972334729184 GB/s) +Achieved 18.306217249033203 GFLOPs +Completed in 0.00017100600000000002 s (achieved 81.86849584225115 GB/s) +Achieved 18.712793703144918 GFLOPs +Completed in 0.00017350200000000002 s (achieved 80.69073555348065 GB/s) +Achieved 18.443591428340884 GFLOPs +Durations: [0.000172499, 0.00017339000000000002, 0.00017273400000000002, 0.00017025000000000002, 0.000168794, 0.000173627, 0.00025067000000000004, 0.000174804, 0.00017100600000000002, 0.00017350200000000002] +Median duration 0.00017339000000000002 (80.74285714285715 GB/s) 17.706766917293233% of peak +Median achieved 18.455504931080224 GFLOPs +./spmm_benchmark 100000 100000 8 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 8 columns. +OK! +Completed in 0.00036034900000000004 s (achieved 54.391725799155815 GB/s) +Achieved 71.04223960660359 GFLOPs +Completed in 0.000359741 s (achieved 54.48365351739168 GB/s) +Achieved 71.16230843857109 GFLOPs +Completed in 0.000357537 s (achieved 54.81951238613067 GB/s) +Achieved 71.60098115719492 GFLOPs +Completed in 0.00035554900000000003 s (achieved 55.12602763613454 GB/s) +Achieved 72.00132752447622 GFLOPs +Completed in 0.000354574 s (achieved 55.277612007648614 GB/s) +Achieved 72.19931523461958 GFLOPs +Completed in 0.000365477 s (achieved 53.62855665335986 GB/s) +Achieved 70.04544745633788 GFLOPs +Completed in 0.000361198 s (achieved 54.263877430107584 GB/s) +Achieved 70.87525401580297 GFLOPs +Completed in 0.000359207 s (achieved 54.564649352601705 GB/s) +Achieved 71.26809889562286 GFLOPs +Completed in 0.00035852000000000003 s (achieved 54.66920673881513 GB/s) +Achieved 71.4046636170925 GFLOPs +Completed in 0.000355724 s (achieved 55.0989081422676 GB/s) +Achieved 71.96590615196051 GFLOPs +Durations: [0.00036034900000000004, 0.000359741, 0.000357537, 0.00035554900000000003, 0.000354574, 0.000365477, 0.000361198, 0.000359207, 0.00035852000000000003, 0.000355724] +Median duration 0.000359207 (54.564649352601705 GB/s) 11.965931875570549% of peak +Median achieved 71.26809889562286 GFLOPs +./spmm_benchmark 100000 100000 32 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 32 columns. +OK! +Completed in 0.00036427400000000004 s (achieved 106.51324003360106 GB/s) +Achieved 281.1070787374339 GFLOPs +Completed in 0.00035559800000000005 s (achieved 109.11198600667043 GB/s) +Achieved 287.9656241036226 GFLOPs +Completed in 0.000428352 s (achieved 90.57971948304198 GB/s) +Achieved 239.05572986702526 GFLOPs +Completed in 0.000353547 s (achieved 109.74496743007295 GB/s) +Achieved 289.63617284264893 GFLOPs +Completed in 0.000355062 s (achieved 109.27670097053472 GB/s) +Achieved 288.4003357160158 GFLOPs +Completed in 0.000352584 s (achieved 110.04470991309871 GB/s) +Achieved 290.4272457059878 GFLOPs +Completed in 0.00035423100000000004 s (achieved 109.53305611310132 GB/s) +Achieved 289.07690179572086 GFLOPs +Completed in 0.00035504700000000004 s (achieved 109.28131768470088 GB/s) +Achieved 288.41252003255903 GFLOPs +Completed in 0.000354061 s (achieved 109.58564767088156 GB/s) +Achieved 289.2157001194709 GFLOPs +Completed in 0.00036189800000000004 s (achieved 107.21254055009973 GB/s) +Achieved 282.95265516803073 GFLOPs +Durations: [0.00036427400000000004, 0.00035559800000000005, 0.000428352, 0.000353547, 0.000355062, 0.000352584, 0.00035423100000000004, 0.00035504700000000004, 0.000354061, 0.00036189800000000004] +Median duration 0.000355062 (109.27670097053472 GB/s) 23.964188809327787% of peak +Median achieved 288.4003357160158 GFLOPs +./spmm_benchmark 100000 100000 64 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 64 columns. +OK! +Completed in 0.00036197100000000005 s (achieved 177.91481638031777 GB/s) +Achieved 565.791182166527 GFLOPs +Completed in 0.00045433200000000005 s (achieved 141.74657299067644 GB/s) +Achieved 450.77168238204655 GFLOPs +Completed in 0.00036095 s (achieved 178.41807452555759 GB/s) +Achieved 567.3916054855243 GFLOPs +Completed in 0.000369464 s (achieved 174.30657384751967 GB/s) +Achieved 554.3165233960549 GFLOPs +Completed in 0.000366414 s (achieved 175.75748743224878 GB/s) +Achieved 558.9306085466167 GFLOPs +Completed in 0.00036826700000000003 s (achieved 174.87313280853294 GB/s) +Achieved 556.1182511601637 GFLOPs +Completed in 0.000361071 s (achieved 178.35828410478828 GB/s) +Achieved 567.2014645319065 GFLOPs +Completed in 0.00036341000000000005 s (achieved 177.2103244269558 GB/s) +Achieved 563.5508103794612 GFLOPs +Completed in 0.00036020800000000005 s (achieved 178.78560165237863 GB/s) +Achieved 568.5603873317639 GFLOPs +Completed in 0.00036163700000000005 s (achieved 178.07913460182448 GB/s) +Achieved 566.3137344906633 GFLOPs +Durations: [0.00036197100000000005, 0.00045433200000000005, 0.00036095, 0.000369464, 0.000366414, 0.00036826700000000003, 0.000361071, 0.00036341000000000005, 0.00036020800000000005, 0.00036163700000000005] +Median duration 0.00036341000000000005 (177.2103244269558 GB/s) 38.861913251525394% of peak +Median achieved 563.5508103794612 GFLOPs +./spmm_benchmark 100000 100000 128 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 128 columns. +OK! +Completed in 0.000648247 s (achieved 178.32709445627978 GB/s) +Achieved 631.8579183551949 GFLOPs +Completed in 0.0007556700000000001 s (achieved 152.9768338031151 GB/s) +Achieved 542.0355446160361 GFLOPs +Completed in 0.0006593450000000001 s (achieved 175.32551850700315 GB/s) +Achieved 621.2225769513683 GFLOPs +Completed in 0.000773442 s (achieved 149.46176183863818 GB/s) +Achieved 529.5807571867057 GFLOPs +Completed in 0.000644647 s (achieved 179.32295349237646 GB/s) +Achieved 635.3864983471574 GFLOPs +Completed in 0.000806904 s (achieved 143.26363978862418 GB/s) +Achieved 507.6192459078156 GFLOPs +Completed in 0.000666845 s (achieved 173.35363390293097 GB/s) +Achieved 614.2356919524027 GFLOPs +Completed in 0.0008024680000000001 s (achieved 144.0555934940708 GB/s) +Achieved 510.42533783278583 GFLOPs +Completed in 0.0006465560000000001 s (achieved 178.79349043238327 GB/s) +Achieved 633.5104770507118 GFLOPs +Completed in 0.000745857 s (achieved 154.9895006683587 GB/s) +Achieved 549.1669314627335 GFLOPs +Durations: [0.000648247, 0.0007556700000000001, 0.0006593450000000001, 0.000773442, 0.000644647, 0.000806904, 0.000666845, 0.0008024680000000001, 0.0006465560000000001, 0.000745857] +Median duration 0.000745857 (154.9895006683587 GB/s) 33.98892558516638% of peak +Median achieved 549.1669314627335 GFLOPs +./spmm_benchmark 100000 100000 256 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 256 columns. +OK! +Completed in 0.0012460840000000002 s (achieved 174.9480805467368 GB/s) +Achieved 657.4195640101309 GFLOPs +Completed in 0.001263746 s (achieved 172.50302196802207 GB/s) +Achieved 648.2315275379705 GFLOPs +Completed in 0.001253409 s (achieved 173.92567310430994 GB/s) +Achieved 653.5775632694516 GFLOPs +Completed in 0.001236609 s (achieved 176.28854714788588 GB/s) +Achieved 662.4567668519313 GFLOPs +Completed in 0.001237852 s (achieved 176.11152544892283 GB/s) +Achieved 661.7915550485842 GFLOPs +Completed in 0.001240931 s (achieved 175.67455724774385 GB/s) +Achieved 660.1495167740994 GFLOPs +Completed in 0.0012402320000000002 s (achieved 175.77356817111635 GB/s) +Achieved 660.5215798334505 GFLOPs +Completed in 0.001239543 s (achieved 175.87127191230962 GB/s) +Achieved 660.8887307660968 GFLOPs +Completed in 0.0012435640000000002 s (achieved 175.30260123322964 GB/s) +Achieved 658.7517811708926 GFLOPs +Completed in 0.0012395070000000002 s (achieved 175.87637988329232 GB/s) +Achieved 660.9079254897309 GFLOPs +Durations: [0.0012460840000000002, 0.001263746, 0.001253409, 0.001236609, 0.001237852, 0.001240931, 0.0012402320000000002, 0.001239543, 0.0012435640000000002, 0.0012395070000000002] +Median duration 0.001240931 (175.67455724774385 GB/s) 38.5251222034526% of peak +Median achieved 660.1495167740994 GFLOPs +./spmm_benchmark 100000 100000 512 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 512 columns. +OK! +Completed in 0.002828951 s (achieved 149.45469327676582 GB/s) +Achieved 579.1546053643206 GFLOPs +Completed in 0.0029760790000000004 s (achieved 142.06612257268708 GB/s) +Achieved 550.5230203902516 GFLOPs +Completed in 0.0028174240000000002 s (achieved 150.06616114578424 GB/s) +Achieved 581.5241156460653 GFLOPs +Completed in 0.00295793 s (achieved 142.9377990689435 GB/s) +Achieved 553.9008698650746 GFLOPs +Completed in 0.0028642510000000004 s (achieved 147.6127629876013 GB/s) +Achieved 572.0169077360887 GFLOPs +Completed in 0.0030087020000000003 s (achieved 140.5257164052804 GB/s) +Achieved 544.5537643807861 GFLOPs +Completed in 0.003080591 s (achieved 137.2463933057001 GB/s) +Achieved 531.8459996799315 GFLOPs +Completed in 0.0028762690000000003 s (achieved 146.99598820555377 GB/s) +Achieved 569.6268325389593 GFLOPs +Completed in 0.0030587630000000004 s (achieved 138.22581350696342 GB/s) +Achieved 535.6413687493931 GFLOPs +Completed in 0.003094963 s (achieved 136.60906576266018 GB/s) +Achieved 529.3762801041563 GFLOPs +Durations: [0.002828951, 0.0029760790000000004, 0.0028174240000000002, 0.00295793, 0.0028642510000000004, 0.0030087020000000003, 0.003080591, 0.0028762690000000003, 0.0030587630000000004, 0.003094963] +Median duration 0.0029760790000000004 (142.06612257268708 GB/s) 31.154851441378742% of peak +Median achieved 550.5230203902516 GFLOPs +./spmm_benchmark 100000 100000 1 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 1 columns. +OK! +Completed in 0.00024748900000000004 s (achieved 108.28765722921018 GB/s) +Achieved 25.859735180149418 GFLOPs +Completed in 0.000250838 s (achieved 106.84188201149746 GB/s) +Achieved 25.51447547819708 GFLOPs +Completed in 0.000250199 s (achieved 107.11475265688513 GB/s) +Achieved 25.579638607668297 GFLOPs +Completed in 0.00024987 s (achieved 107.25578901028535 GB/s) +Achieved 25.613318925841437 GFLOPs +Completed in 0.00025352 s (achieved 105.71159671820763 GB/s) +Achieved 25.244556642473967 GFLOPs +Completed in 0.00033307000000000003 s (achieved 80.46357822679917 GB/s) +Achieved 19.215179992193832 GFLOPs +Completed in 0.00024741 s (achieved 108.32223434784368 GB/s) +Achieved 25.867992401277235 GFLOPs +Completed in 0.000251607 s (achieved 106.51533542389521 GB/s) +Achieved 25.436494215184794 GFLOPs +Completed in 0.00025110700000000003 s (achieved 106.72742695345012 GB/s) +Achieved 25.487142931101083 GFLOPs +Completed in 0.000244354 s (achieved 109.67696047537589 GB/s) +Achieved 26.19150904016304 GFLOPs +Durations: [0.00024748900000000004, 0.000250838, 0.000250199, 0.00024987, 0.00025352, 0.00033307000000000003, 0.00024741, 0.000251607, 0.00025110700000000003, 0.000244354] +Median duration 0.000250838 (106.84188201149746 GB/s) 23.430237283223125% of peak +Median achieved 25.51447547819708 GFLOPs +./spmm_benchmark 100000 100000 8 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 8 columns. +OK! +Completed in 0.000348965 s (achieved 92.84599888240942 GB/s) +Achieved 146.71958505867352 GFLOPs +Completed in 0.00034681100000000004 s (achieved 93.4226538373927 GB/s) +Achieved 147.63084215898573 GFLOPs +Completed in 0.00034835700000000004 s (achieved 93.0080463432628 GB/s) +Achieved 146.9756600269264 GFLOPs +Completed in 0.000429715 s (achieved 75.39882014823779 GB/s) +Achieved 119.14873811712414 GFLOPs +Completed in 0.000347797 s (achieved 93.1578018211773 GB/s) +Achieved 147.2123106294764 GFLOPs +Completed in 0.00044350700000000005 s (achieved 73.05409835695941 GB/s) +Achieved 115.44349920068905 GFLOPs +Completed in 0.000348769 s (achieved 92.89817615671117 GB/s) +Achieved 146.8020380251685 GFLOPs +Completed in 0.000351096 s (achieved 92.28246405541505 GB/s) +Achieved 145.82906099756192 GFLOPs +Completed in 0.000347147 s (achieved 93.332231014527 GB/s) +Achieved 147.48795178987575 GFLOPs +Completed in 0.00034685300000000005 s (achieved 93.41134140399535 GB/s) +Achieved 147.61296572323144 GFLOPs +Durations: [0.000348965, 0.00034681100000000004, 0.00034835700000000004, 0.000429715, 0.000347797, 0.00044350700000000005, 0.000348769, 0.000351096, 0.000347147, 0.00034685300000000005] +Median duration 0.000348769 (92.89817615671117 GB/s) 20.372407051910344% of peak +Median achieved 146.8020380251685 GFLOPs +./spmm_benchmark 100000 100000 32 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 32 columns. +OK! +Completed in 0.000516547 s (achieved 99.89411224922418 GB/s) +Achieved 396.4789264094071 GFLOPs +Completed in 0.000387581 s (achieved 133.1334714549991 GB/s) +Achieved 528.4056752007967 GFLOPs +Completed in 0.000368907 s (achieved 139.87266167353832 GB/s) +Achieved 555.1534668629221 GFLOPs +Completed in 0.00036733400000000003 s (achieved 140.47162527835704 GB/s) +Achieved 557.5307485830334 GFLOPs +Completed in 0.00037172700000000003 s (achieved 138.81155794440545 GB/s) +Achieved 550.9419547140778 GFLOPs +Completed in 0.000376275 s (achieved 137.13375589661817 GB/s) +Achieved 544.2827719088433 GFLOPs +Completed in 0.00037906800000000004 s (achieved 136.12334462418352 GB/s) +Achieved 540.2724577120728 GFLOPs +Completed in 0.00038137200000000005 s (achieved 135.30097647441343 GB/s) +Achieved 537.0084851536033 GFLOPs +Completed in 0.00047037 s (achieved 109.70088228415929 GB/s) +Achieved 435.4019176393052 GFLOPs +Completed in 0.000377709 s (achieved 136.61311750580475 GB/s) +Achieved 542.2163623318481 GFLOPs +Durations: [0.000516547, 0.000387581, 0.000368907, 0.00036733400000000003, 0.00037172700000000003, 0.000376275, 0.00037906800000000004, 0.00038137200000000005, 0.00047037, 0.000377709] +Median duration 0.00037906800000000004 (136.12334462418352 GB/s) 29.851610663198137% of peak +Median achieved 540.2724577120728 GFLOPs +./spmm_benchmark 100000 100000 64 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 64 columns. +OK! +Completed in 0.0007425740000000001 s (achieved 103.9627080937388 GB/s) +Achieved 551.5948578862174 GFLOPs +Completed in 0.00056795 s (achieved 135.92746544590193 GB/s) +Achieved 721.1902456202131 GFLOPs +Completed in 0.00065134 s (achieved 118.52489329689563 GB/s) +Achieved 628.8574323701907 GFLOPs +Completed in 0.000562436 s (achieved 137.26006870115 GB/s) +Achieved 728.2606376547732 GFLOPs +Completed in 0.0005677960000000001 s (achieved 135.96433226017794 GB/s) +Achieved 721.3858498474804 GFLOPs +Completed in 0.000576157 s (achieved 133.99126279816093 GB/s) +Achieved 710.9173367675825 GFLOPs +Completed in 0.0006736 s (achieved 114.60808194774347 GB/s) +Achieved 608.0760095011876 GFLOPs +Completed in 0.000573191 s (achieved 134.68460600393237 GB/s) +Achieved 714.5960072645942 GFLOPs +Completed in 0.0005753090000000001 s (achieved 134.18876464647693 GB/s) +Achieved 711.9652221675656 GFLOPs +Completed in 0.0005653100000000001 s (achieved 136.5622472625639 GB/s) +Achieved 724.5582070014682 GFLOPs +Durations: [0.0007425740000000001, 0.00056795, 0.00065134, 0.000562436, 0.0005677960000000001, 0.000576157, 0.0006736, 0.000573191, 0.0005753090000000001, 0.0005653100000000001] +Median duration 0.0005753090000000001 (134.18876464647693 GB/s) 29.427360668087044% of peak +Median achieved 711.9652221675656 GFLOPs +./spmm_benchmark 100000 100000 128 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 128 columns. +OK! +Completed in 0.000892056 s (achieved 143.9371564117051 GB/s) +Achieved 918.3279973454581 GFLOPs +Completed in 0.000890527 s (achieved 144.18429087495383 GB/s) +Achieved 919.9047305696515 GFLOPs +Completed in 0.000896346 s (achieved 143.248259042825 GB/s) +Achieved 913.9327893469709 GFLOPs +Completed in 0.000889839 s (achieved 144.29577035845813 GB/s) +Achieved 920.6159765980138 GFLOPs +Completed in 0.000886161 s (achieved 144.89466812464102 GB/s) +Achieved 924.4369815417289 GFLOPs +Completed in 0.0008907860000000001 s (achieved 144.14236864970937 GB/s) +Achieved 919.6372641689474 GFLOPs +Completed in 0.000890569 s (achieved 144.17749101978623 GB/s) +Achieved 919.8613470713667 GFLOPs +Completed in 0.0008824140000000001 s (achieved 145.50993524581432 GB/s) +Achieved 928.3624239869267 GFLOPs +Completed in 0.0008869860000000001 s (achieved 144.7598992543287 GB/s) +Achieved 923.577147779108 GFLOPs +Completed in 0.000900714 s (achieved 142.5535786054175 GB/s) +Achieved 909.5006850121126 GFLOPs +Durations: [0.000892056, 0.000890527, 0.000896346, 0.000889839, 0.000886161, 0.0008907860000000001, 0.000890569, 0.0008824140000000001, 0.0008869860000000001, 0.000900714] +Median duration 0.000890569 (144.17749101978623 GB/s) 31.617870837672417% of peak +Median achieved 919.8613470713667 GFLOPs +./spmm_benchmark 100000 100000 256 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 256 columns. +OK! +Completed in 0.002392687 s (achieved 96.46059179491509 GB/s) +Achieved 684.7531666281465 GFLOPs +Completed in 0.002123438 s (achieved 108.69166135295686 GB/s) +Achieved 771.5789205995184 GFLOPs +Completed in 0.00237154 s (achieved 97.32072998979565 GB/s) +Achieved 690.859104210766 GFLOPs +Completed in 0.00226042 s (achieved 102.10492032454145 GB/s) +Achieved 724.8210509551323 GFLOPs +Completed in 0.002383524 s (achieved 96.83141600420218 GB/s) +Achieved 687.3855685950718 GFLOPs +Completed in 0.0021488930000000002 s (achieved 107.4041397128661 GB/s) +Achieved 762.4390790979355 GFLOPs +Completed in 0.002254691 s (achieved 102.364361236196 GB/s) +Achieved 726.6627666496207 GFLOPs +Completed in 0.002078102 s (achieved 111.0628852674219 GB/s) +Achieved 788.4117333990343 GFLOPs +Completed in 0.002293013 s (achieved 100.65359594559648 GB/s) +Achieved 714.5184087486639 GFLOPs +Completed in 0.0021297110000000003 s (achieved 108.37151331800416 GB/s) +Achieved 769.3062579852383 GFLOPs +Durations: [0.002392687, 0.002123438, 0.00237154, 0.00226042, 0.002383524, 0.0021488930000000002, 0.002254691, 0.002078102, 0.002293013, 0.0021297110000000003] +Median duration 0.00226042 (102.10492032454145 GB/s) 22.391429895732774% of peak +Median achieved 724.8210509551323 GFLOPs +./spmm_benchmark 100000 100000 512 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 512 columns. +OK! +Completed in 0.004999063000000001 s (achieved 87.13633014826978 GB/s) +Achieved 655.4828374837444 GFLOPs +Completed in 0.004903583 s (achieved 88.83300313260732 GB/s) +Achieved 668.246055996197 GFLOPs +Completed in 0.005049919 s (achieved 86.25881009180544 GB/s) +Achieved 648.8816949341168 GFLOPs +Completed in 0.004891077000000001 s (achieved 89.06014033310046 GB/s) +Achieved 669.9546950497814 GFLOPs +Completed in 0.005029932 s (achieved 86.60156916634261 GB/s) +Achieved 651.4600992617793 GFLOPs +Completed in 0.004993139000000001 s (achieved 87.2397111316148 GB/s) +Achieved 656.260520686486 GFLOPs +Completed in 0.00498043 s (achieved 87.46232835317433 GB/s) +Achieved 657.935158209231 GFLOPs +Completed in 0.004938944000000001 s (achieved 88.19699190758186 GB/s) +Achieved 663.4616630599577 GFLOPs +Completed in 0.004900495 s (achieved 88.88898039891889 GB/s) +Achieved 668.6671448496529 GFLOPs +Completed in 0.004985417000000001 s (achieved 87.37483825324942 GB/s) +Achieved 657.2770141394391 GFLOPs +Durations: [0.004999063000000001, 0.004903583, 0.005049919, 0.004891077000000001, 0.005029932, 0.004993139000000001, 0.00498043, 0.004938944000000001, 0.004900495, 0.004985417000000001] +Median duration 0.004985417000000001 (87.37483825324942 GB/s) 19.16114873974768% of peak +Median achieved 657.2770141394391 GFLOPs +./spmm_benchmark 100000 100000 1 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 1 columns. +OK! +Completed in 0.000546019 s (achieved 95.96736377305551 GB/s) +Achieved 23.44240768178397 GFLOPs +Completed in 0.000539678 s (achieved 97.09494179862807 GB/s) +Achieved 23.717846567768188 GFLOPs +Completed in 0.0005472060000000001 s (achieved 95.75919123693818 GB/s) +Achieved 23.391556379133267 GFLOPs +Completed in 0.00055082 s (achieved 95.13090301731962 GB/s) +Achieved 23.238081405903923 GFLOPs +Completed in 0.0005466690000000001 s (achieved 95.85325672390421 GB/s) +Achieved 23.414534206256434 GFLOPs +Completed in 0.000540546 s (achieved 96.939028315814 GB/s) +Achieved 23.67976083441557 GFLOPs +Completed in 0.000544888 s (achieved 96.16655899928058 GB/s) +Achieved 23.491066053941363 GFLOPs +Completed in 0.0006226340000000001 s (achieved 84.15859718550544 GB/s) +Achieved 20.557823697388834 GFLOPs +Completed in 0.000542054 s (achieved 96.66934290679525 GB/s) +Achieved 23.613883487623003 GFLOPs +Completed in 0.000546882 s (achieved 95.81592372760485 GB/s) +Achieved 23.405414696406172 GFLOPs +Durations: [0.000546019, 0.000539678, 0.0005472060000000001, 0.00055082, 0.0005466690000000001, 0.000540546, 0.000544888, 0.0006226340000000001, 0.000542054, 0.000546882] +Median duration 0.0005466690000000001 (95.85325672390421 GB/s) 21.020451035943903% of peak +Median achieved 23.414534206256434 GFLOPs +./spmm_benchmark 100000 100000 8 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 8 columns. +OK! +Completed in 0.00040535 s (achieved 143.08623165165906 GB/s) +Achieved 252.62119156284692 GFLOPs +Completed in 0.000599274 s (achieved 96.78378170920146 GB/s) +Achieved 170.8734235091127 GFLOPs +Completed in 0.000517663 s (achieved 112.04201188804299 GB/s) +Achieved 197.81209010495246 GFLOPs +Completed in 0.0005904170000000001 s (achieved 98.23566055855436 GB/s) +Achieved 173.4367404732587 GFLOPs +Completed in 0.000489022 s (achieved 118.6040791620827 GB/s) +Achieved 209.39753221736447 GFLOPs +Completed in 0.000407233 s (achieved 142.42461686552906 GB/s) +Achieved 251.45309933134104 GFLOPs +Completed in 0.00040496300000000005 s (achieved 143.2229709874729 GB/s) +Achieved 252.8626072011517 GFLOPs +Completed in 0.000587942 s (achieved 98.64919328777327 GB/s) +Achieved 174.1668395862177 GFLOPs +Completed in 0.00040745900000000004 s (achieved 142.34562005011546 GB/s) +Achieved 251.31362910133288 GFLOPs +Completed in 0.00040414700000000005 s (achieved 143.51214780760463 GB/s) +Achieved 253.3731538276914 GFLOPs +Durations: [0.00040535, 0.000599274, 0.000517663, 0.0005904170000000001, 0.000489022, 0.000407233, 0.00040496300000000005, 0.000587942, 0.00040745900000000004, 0.00040414700000000005] +Median duration 0.000489022 (118.6040791620827 GB/s) 26.00966648291287% of peak +Median achieved 209.39753221736447 GFLOPs +./spmm_benchmark 100000 100000 32 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 32 columns. +OK! +Completed in 0.0005836390000000001 s (achieved 132.273552658407 GB/s) +Achieved 701.8036834413053 GFLOPs +Completed in 0.00059123 s (achieved 130.57524821135598 GB/s) +Achieved 692.7929908834126 GFLOPs +Completed in 0.000592369 s (achieved 130.32417969205005 GB/s) +Achieved 691.4608968396388 GFLOPs +Completed in 0.0005824290000000001 s (achieved 132.54835181627288 GB/s) +Achieved 703.2616851152673 GFLOPs +Completed in 0.000588333 s (achieved 131.21821145507732 GB/s) +Achieved 696.2043604557283 GFLOPs +Completed in 0.000590667 s (achieved 130.6997072800749 GB/s) +Achieved 693.4533332656133 GFLOPs +Completed in 0.0005892590000000001 s (achieved 131.01200660490548 GB/s) +Achieved 695.1102995457005 GFLOPs +Completed in 0.000598573 s (achieved 128.9734151055928 GB/s) +Achieved 684.2941462444848 GFLOPs +Completed in 0.000590049 s (achieved 130.836598316411 GB/s) +Achieved 694.1796359285415 GFLOPs +Completed in 0.000593014 s (achieved 130.18243076891946 GB/s) +Achieved 690.7088196905976 GFLOPs +Durations: [0.0005836390000000001, 0.00059123, 0.000592369, 0.0005824290000000001, 0.000588333, 0.000590667, 0.0005892590000000001, 0.000598573, 0.000590049, 0.000593014] +Median duration 0.000590667 (130.6997072800749 GB/s) 28.662216508788354% of peak +Median achieved 693.4533332656133 GFLOPs +./spmm_benchmark 100000 100000 64 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 64 columns. +OK! +Completed in 0.000966302 s (achieved 106.38496453489695 GB/s) +Achieved 847.7680890653232 GFLOPs +Completed in 0.000945417 s (achieved 108.73509149930665 GB/s) +Achieved 866.4959483487181 GFLOPs +Completed in 0.0008135210000000001 s (achieved 126.36429053460205 GB/s) +Achieved 1006.9807663231803 GFLOPs +Completed in 0.0008162820000000001 s (achieved 125.93687475651795 GB/s) +Achieved 1003.5747450023398 GFLOPs +Completed in 0.00082291 s (achieved 124.92253587877167 GB/s) +Achieved 995.4916090459467 GFLOPs +Completed in 0.000817911 s (achieved 125.68605141635214 GB/s) +Achieved 1001.57596608922 GFLOPs +Completed in 0.000820621 s (achieved 125.27098867808647 GB/s) +Achieved 998.2683845526742 GFLOPs +Completed in 0.000827584 s (achieved 124.21700274534065 GB/s) +Achieved 989.8693063181502 GFLOPs +Completed in 0.000814927 s (achieved 126.14627322447286 GB/s) +Achieved 1005.2434144408027 GFLOPs +Completed in 0.000829815 s (achieved 123.88303899061839 GB/s) +Achieved 987.207992142827 GFLOPs +Durations: [0.000966302, 0.000945417, 0.0008135210000000001, 0.0008162820000000001, 0.00082291, 0.000817911, 0.000820621, 0.000827584, 0.000814927, 0.000829815] +Median duration 0.00082291 (124.92253587877167 GB/s) 27.39529295587098% of peak +Median achieved 995.4916090459467 GFLOPs +./spmm_benchmark 100000 100000 128 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 128 columns. +OK! +Completed in 0.001569599 s (achieved 98.11423427257535 GB/s) +Achieved 1043.8334886808668 GFLOPs +Completed in 0.0016042320000000001 s (achieved 95.99609283445287 GB/s) +Achieved 1021.298665030993 GFLOPs +Completed in 0.001493112 s (achieved 103.14028954291439 GB/s) +Achieved 1097.3054934927857 GFLOPs +Completed in 0.001553708 s (achieved 99.11772611069776 GB/s) +Achieved 1054.5095989722652 GFLOPs +Completed in 0.001495444 s (achieved 102.97945225631986 GB/s) +Achieved 1095.5943519115392 GFLOPs +Completed in 0.001586303 s (achieved 97.08107719647506 GB/s) +Achieved 1032.8417710866083 GFLOPs +Completed in 0.001616915 s (achieved 95.24310430665804 GB/s) +Achieved 1013.2876496290776 GFLOPs +Completed in 0.0015557580000000002 s (achieved 98.98712010479778 GB/s) +Achieved 1053.1200868001322 GFLOPs +Completed in 0.0015214660000000002 s (achieved 101.21816984408457 GB/s) +Achieved 1076.8561374358676 GFLOPs +Completed in 0.001557358 s (achieved 98.88542261959036 GB/s) +Achieved 1052.0381312453528 GFLOPs +Durations: [0.001569599, 0.0016042320000000001, 0.001493112, 0.001553708, 0.001495444, 0.001586303, 0.001616915, 0.0015557580000000002, 0.0015214660000000002, 0.001557358] +Median duration 0.001557358 (98.88542261959036 GB/s) 21.685399697278587% of peak +Median achieved 1052.0381312453528 GFLOPs +./spmm_benchmark 100000 100000 256 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 256 columns. +OK! +Completed in 0.003656147 s (achieved 70.12847240551324 GB/s) +Achieved 896.2440514563556 GFLOPs +Completed in 0.004092903 s (achieved 62.64502334895305 GB/s) +Achieved 800.6053405125897 GFLOPs +Completed in 0.004123835 s (achieved 62.17513649309441 GB/s) +Achieved 794.6001719273444 GFLOPs +Completed in 0.0037026040000000004 s (achieved 69.24856236313686 GB/s) +Achieved 884.9987738359273 GFLOPs +Completed in 0.003630134 s (achieved 70.6310026021078 GB/s) +Achieved 902.6664029482107 GFLOPs +Completed in 0.0036495000000000004 s (achieved 70.25620057542129 GB/s) +Achieved 897.8764214275927 GFLOPs +Completed in 0.0036375210000000003 s (achieved 70.48756666971819 GB/s) +Achieved 900.8332872854892 GFLOPs +Completed in 0.003723605 s (achieved 68.85800292995633 GB/s) +Achieved 880.0074121718067 GFLOPs +Completed in 0.003653256 s (achieved 70.18396849276371 GB/s) +Achieved 896.9532931718993 GFLOPs +Completed in 0.003625554 s (achieved 70.72022758452916 GB/s) +Achieved 903.8067009896971 GFLOPs +Durations: [0.003656147, 0.004092903, 0.004123835, 0.0037026040000000004, 0.003630134, 0.0036495000000000004, 0.0036375210000000003, 0.003723605, 0.003653256, 0.003625554] +Median duration 0.003656147 (70.12847240551324 GB/s) 15.379050966121323% of peak +Median achieved 896.2440514563556 GFLOPs +./spmm_benchmark 100000 100000 512 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 512 columns. +OK! +Completed in 0.009368300000000001 s (achieved 49.22985002615202 GB/s) +Achieved 699.5506121708314 GFLOPs +Completed in 0.009277689 s (achieved 49.710655746274746 GB/s) +Achieved 706.3828071839873 GFLOPs +Completed in 0.009460342 s (achieved 48.750880676406844 GB/s) +Achieved 692.7445117734644 GFLOPs +Completed in 0.009415544000000001 s (achieved 48.98283136906375 GB/s) +Achieved 696.0405049352432 GFLOPs +Completed in 0.009260743 s (achieved 49.80162002120133 GB/s) +Achieved 707.6753992633205 GFLOPs +Completed in 0.009513765 s (achieved 48.47712803501033 GB/s) +Achieved 688.8545176383902 GFLOPs +Completed in 0.009380281 s (achieved 49.16697101078316 GB/s) +Achieved 698.6571084597572 GFLOPs +Completed in 0.009076718000000001 s (achieved 50.81131792350495 GB/s) +Achieved 722.0230924878354 GFLOPs +Completed in 0.00923639 s (achieved 49.932928774120626 GB/s) +Achieved 709.5412818211444 GFLOPs +Completed in 0.009417663 s (achieved 48.9718100976856 GB/s) +Achieved 695.8838939129591 GFLOPs +Durations: [0.009368300000000001, 0.009277689, 0.009460342, 0.009415544000000001, 0.009260743, 0.009513765, 0.009380281, 0.009076718000000001, 0.00923639, 0.009417663] +Median duration 0.009380281 (49.16697101078316 GB/s) 10.782230484820868% of peak +Median achieved 698.6571084597572 GFLOPs +./spmm_benchmark 100000 100000 1 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 1 columns. +OK! +Completed in 0.00153337 s (achieved 67.56360434859167 GB/s) +Achieved 16.69525293960362 GFLOPs +Completed in 0.0014897270000000002 s (achieved 69.54294578805379 GB/s) +Achieved 17.18435659687983 GFLOPs +Completed in 0.001526065 s (achieved 67.88701922919404 GB/s) +Achieved 16.775170127091574 GFLOPs +Completed in 0.0014456430000000001 s (achieved 71.66361542925881 GB/s) +Achieved 17.70838305169395 GFLOPs +Completed in 0.001574548 s (achieved 65.79666291532554 GB/s) +Achieved 16.25863422391696 GFLOPs +Completed in 0.0014620430000000001 s (achieved 70.85975173096824 GB/s) +Achieved 17.50974492542285 GFLOPs +Completed in 0.0015609850000000002 s (achieved 66.36835331537459 GB/s) +Achieved 16.399901344343473 GFLOPs +Completed in 0.00144059 s (achieved 71.91498205596318 GB/s) +Achieved 17.770496810334656 GFLOPs +Completed in 0.00146123 s (achieved 70.89917672098164 GB/s) +Achieved 17.51948700752106 GFLOPs +Completed in 0.0016047370000000002 s (achieved 64.55886790171847 GB/s) +Achieved 15.952769830819628 GFLOPs +Durations: [0.00153337, 0.0014897270000000002, 0.001526065, 0.0014456430000000001, 0.001574548, 0.0014620430000000001, 0.0015609850000000002, 0.00144059, 0.00146123, 0.0016047370000000002] +Median duration 0.001526065 (67.88701922919404 GB/s) 14.887504216928516% of peak +Median achieved 16.775170127091574 GFLOPs +./spmm_benchmark 100000 100000 8 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 8 columns. +OK! +Completed in 0.0006573550000000001 s (achieved 166.12029116687327 GB/s) +Achieved 311.55159693012143 GFLOPs +Completed in 0.000652332 s (achieved 167.39942851186206 GB/s) +Achieved 313.95056504969864 GFLOPs +Completed in 0.000645617 s (achieved 169.1405337839617 GB/s) +Achieved 317.2159345246485 GFLOPs +Completed in 0.0006455650000000001 s (achieved 169.15415798564047 GB/s) +Achieved 317.24148614004787 GFLOPs +Completed in 0.0006630690000000001 s (achieved 164.68874883307768 GB/s) +Achieved 308.8667996844974 GFLOPs +Completed in 0.0006607980000000001 s (achieved 165.2547435070929 GB/s) +Achieved 309.9282988144637 GFLOPs +Completed in 0.000662455 s (achieved 164.84139149074275 GB/s) +Achieved 309.15307454845987 GFLOPs +Completed in 0.000651856 s (achieved 167.52166736211677 GB/s) +Achieved 314.1798188556982 GFLOPs +Completed in 0.0006434020000000001 s (achieved 169.72282336703958 GB/s) +Achieved 318.3079940690268 GFLOPs +Completed in 0.000646273 s (achieved 168.968847530378 GB/s) +Achieved 316.8939442000517 GFLOPs +Durations: [0.0006573550000000001, 0.000652332, 0.000645617, 0.0006455650000000001, 0.0006630690000000001, 0.0006607980000000001, 0.000662455, 0.000651856, 0.0006434020000000001, 0.000646273] +Median duration 0.000652332 (167.39942851186206 GB/s) 36.71040098944343% of peak +Median achieved 313.95056504969864 GFLOPs +./spmm_benchmark 100000 100000 32 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 32 columns. +OK! +Completed in 0.0008777850000000001 s (achieved 146.2772820223631 GB/s) +Achieved 933.2581440785614 GFLOPs +Completed in 0.0008762430000000001 s (achieved 146.5346987080068 GB/s) +Achieved 934.9004785202278 GFLOPs +Completed in 0.0008857740000000001 s (achieved 144.95797347856225 GB/s) +Achieved 924.8408736314228 GFLOPs +Completed in 0.000867859 s (achieved 147.95030529152777 GB/s) +Achieved 943.9321364415188 GFLOPs +Completed in 0.000862556 s (achieved 148.85990474821347 GB/s) +Achieved 949.7354374672485 GFLOPs +Completed in 0.0008737290000000001 s (achieved 146.95632627508073 GB/s) +Achieved 937.5904885839889 GFLOPs +Completed in 0.000874035 s (achieved 146.90487680699286 GB/s) +Achieved 937.2622377822399 GFLOPs +Completed in 0.0008611900000000001 s (achieved 149.09602294499473 GB/s) +Achieved 951.2418862271973 GFLOPs +Completed in 0.000873505 s (achieved 146.9940114824758 GB/s) +Achieved 937.8309225476672 GFLOPs +Completed in 0.000877942 s (achieved 146.2511236505373 GB/s) +Achieved 933.0912520417066 GFLOPs +Durations: [0.0008777850000000001, 0.0008762430000000001, 0.0008857740000000001, 0.000867859, 0.000862556, 0.0008737290000000001, 0.000874035, 0.0008611900000000001, 0.000873505, 0.000877942] +Median duration 0.000874035 (146.90487680699286 GB/s) 32.215981755919486% of peak +Median achieved 937.2622377822399 GFLOPs +./spmm_benchmark 100000 100000 64 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 64 columns. +OK! +Completed in 0.0013165470000000002 s (achieved 116.97265954044936 GB/s) +Achieved 1244.467535150663 GFLOPs +Completed in 0.0013167460000000001 s (achieved 116.9549814466875 GB/s) +Achieved 1244.2794586047726 GFLOPs +Completed in 0.001334698 s (achieved 115.38190961550852 GB/s) +Achieved 1227.5436091160698 GFLOPs +Completed in 0.001328137 s (achieved 115.95189652874664 GB/s) +Achieved 1233.6076775212196 GFLOPs +Completed in 0.001327613 s (achieved 115.99766196926363 GB/s) +Achieved 1234.0945742471638 GFLOPs +Completed in 0.001352783 s (achieved 113.83939922367445 GB/s) +Achieved 1211.1329015814067 GFLOPs +Completed in 0.001322782 s (achieved 116.42130298114125 GB/s) +Achieved 1238.6016743499686 GFLOPs +Completed in 0.001322969 s (achieved 116.40484697676212 GB/s) +Achieved 1238.4265995650692 GFLOPs +Completed in 0.0013503620000000001 s (achieved 114.04349648464633 GB/s) +Achieved 1213.3042843326455 GFLOPs +Completed in 0.001338282 s (achieved 115.07290989492498 GB/s) +Achieved 1224.2561732131194 GFLOPs +Durations: [0.0013165470000000002, 0.0013167460000000001, 0.001334698, 0.001328137, 0.001327613, 0.001352783, 0.001322782, 0.001322969, 0.0013503620000000001, 0.001338282] +Median duration 0.001328137 (115.95189652874664 GB/s) 25.42804748437426% of peak +Median achieved 1233.6076775212196 GFLOPs +./spmm_benchmark 100000 100000 128 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 128 columns. +OK! +Completed in 0.0025257540000000003 s (achieved 81.24306801058219 GB/s) +Achieved 1297.3551660217106 GFLOPs +Completed in 0.002491526 s (achieved 82.35916622985272 GB/s) +Achieved 1315.177927101704 GFLOPs +Completed in 0.0024973760000000004 s (achieved 82.16624328895608 GB/s) +Achieved 1312.097177197186 GFLOPs +Completed in 0.002499491 s (achieved 82.09671649147768 GB/s) +Achieved 1310.9869169362883 GFLOPs +Completed in 0.00249248 s (achieved 82.327643150597 GB/s) +Achieved 1314.6745410193864 GFLOPs +Completed in 0.002507001 s (achieved 81.85078665704562 GB/s) +Achieved 1307.0597099881493 GFLOPs +Completed in 0.0024947660000000003 s (achieved 82.25220481600279 GB/s) +Achieved 1313.4698805419025 GFLOPs +Completed in 0.00249373 s (achieved 82.28637583058311 GB/s) +Achieved 1314.0155510019129 GFLOPs +Completed in 0.002520357 s (achieved 81.41703893535718 GB/s) +Achieved 1300.1332747702013 GFLOPs +Completed in 0.0024863380000000003 s (achieved 82.53101710226044 GB/s) +Achieved 1317.9221811354691 GFLOPs +Durations: [0.0025257540000000003, 0.002491526, 0.0024973760000000004, 0.002499491, 0.00249248, 0.002507001, 0.0024947660000000003, 0.00249373, 0.002520357, 0.0024863380000000003] +Median duration 0.0024973760000000004 (82.16624328895608 GB/s) 18.01891300196405% of peak +Median achieved 1312.097177197186 GFLOPs +./spmm_benchmark 100000 100000 256 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 256 columns. +OK! +Completed in 0.006524261000000001 s (achieved 47.14710279064556 GB/s) +Achieved 1004.4969077723898 GFLOPs +Completed in 0.006437544000000001 s (achieved 47.782198304198 GB/s) +Achieved 1018.0279932843954 GFLOPs +Completed in 0.0064847540000000006 s (achieved 47.43433659935288 GB/s) +Achieved 1010.6165939371023 GFLOPs +Completed in 0.006528453 s (achieved 47.11682905582686 GB/s) +Achieved 1003.8519079481771 GFLOPs +Completed in 0.006314221 s (achieved 48.715432038251436 GB/s) +Achieved 1037.9110899032516 GFLOPs +Completed in 0.0064481650000000005 s (achieved 47.70349456008027 GB/s) +Achieved 1016.3511634705377 GFLOPs +Completed in 0.006347859 s (achieved 48.45728362901571 GB/s) +Achieved 1032.4110853753998 GFLOPs +Completed in 0.006502450000000001 s (achieved 47.305247099170316 GB/s) +Achieved 1007.8662657921244 GFLOPs +Completed in 0.006563397 s (achieved 46.865975652547 GB/s) +Achieved 998.5073278364847 GFLOPs +Completed in 0.0064496530000000005 s (achieved 47.69248888273524 GB/s) +Achieved 1016.1166810059393 GFLOPs +Durations: [0.006524261000000001, 0.006437544000000001, 0.0064847540000000006, 0.006528453, 0.006314221, 0.0064481650000000005, 0.006347859, 0.006502450000000001, 0.006563397, 0.0064496530000000005] +Median duration 0.0064847540000000006 (47.43433659935288 GB/s) 10.402266798103701% of peak +Median achieved 1010.6165939371023 GFLOPs +./spmm_benchmark 100000 100000 512 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 512 columns. +OK! +Completed in 0.018338973 s (achieved 27.94049612265638 GB/s) +Achieved 714.7183214676198 GFLOPs +Completed in 0.01832806 s (achieved 27.957132615235878 GB/s) +Achieved 715.1438832042235 GFLOPs +Completed in 0.01823388 s (achieved 28.1015342867234 GB/s) +Achieved 718.8376801865538 GFLOPs +Completed in 0.018615135 s (achieved 27.525989148077628 GB/s) +Achieved 704.1152266690518 GFLOPs +Completed in 0.018311917 s (achieved 27.981778423307624 GB/s) +Achieved 715.7743233545674 GFLOPs +Completed in 0.018358402000000003 s (achieved 27.910926234211452 GB/s) +Achieved 713.9619232654345 GFLOPs +Completed in 0.018725912 s (achieved 27.363153474180592 GB/s) +Achieved 699.9498876209607 GFLOPs +Completed in 0.018370709000000002 s (achieved 27.89222800274067 GB/s) +Achieved 713.4836222162138 GFLOPs +Completed in 0.018233986 s (achieved 28.101370923505154 GB/s) +Achieved 718.8335013529132 GFLOPs +Completed in 0.018681396000000003 s (achieved 27.428357281222453 GB/s) +Achieved 701.6178020100853 GFLOPs +Durations: [0.018338973, 0.01832806, 0.01823388, 0.018615135, 0.018311917, 0.018358402000000003, 0.018725912, 0.018370709000000002, 0.018233986, 0.018681396000000003] +Median duration 0.018358402000000003 (27.910926234211452 GB/s) 6.120817156625318% of peak +Median achieved 713.9619232654345 GFLOPs +Finished SPMM benchmark tests at Fri Sep 19 08:51:56 PM PDT 2025 diff --git a/scripts/spmm_experiments_sycl.out b/scripts/spmm_experiments_sycl.out new file mode 100644 index 0000000..145c519 --- /dev/null +++ b/scripts/spmm_experiments_sycl.out @@ -0,0 +1,5717 @@ +[level_zero:gpu][level_zero:0] Intel(R) oneAPI Unified Runtime over Level-Zero, Intel(R) Arc(TM) B580 Graphics 20.1.0 [1.13.0] +[opencl:cpu][opencl:0] Intel(R) OpenCL, Intel(R) Core(TM) Ultra 9 285K OpenCL 3.0 (Build 0) [2025.19.4.0.18_160000.xmain-hotfix] +[opencl:gpu][opencl:1] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) B580 Graphics OpenCL 3.0 NEO [25.37.0] +Starting SPMM benchmark tests at Fri Sep 19 08:52:00 PM PDT 2025 +./sycl_spmm 100000 100000 1 16 k 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 1 columns. +Using method k with WG size 16 +OK! +Completed in 0.00016042200000000002 s (achieved 87.26985076859782 GB/s) +Achieved 19.947388762139855 GFLOPs +Completed in 0.000161884 s (achieved 86.48170294778978 GB/s) +Achieved 19.767240740283167 GFLOPs +Completed in 0.000162008 s (achieved 86.41551034516814 GB/s) +Achieved 19.75211100686386 GFLOPs +Completed in 0.00016225700000000002 s (achieved 86.28289688580462 GB/s) +Achieved 19.721799367669806 GFLOPs +Completed in 0.000161946 s (achieved 86.44859397576971 GB/s) +Achieved 19.759672977412226 GFLOPs +Completed in 0.000161867 s (achieved 86.49078564500486 GB/s) +Achieved 19.769316784767742 GFLOPs +Completed in 0.000160526 s (achieved 87.21331123930081 GB/s) +Achieved 19.934465444850055 GFLOPs +Completed in 0.000161769 s (achieved 86.54318194462475 GB/s) +Achieved 19.78129307840192 GFLOPs +Completed in 0.000162253 s (achieved 86.28502400571946 GB/s) +Achieved 19.722285566368573 GFLOPs +Completed in 0.000162072 s (achieved 86.38138605064417 GB/s) +Achieved 19.744311170344044 GFLOPs +Durations: [0.00016042200000000002, 0.000161884, 0.000162008, 0.00016225700000000002, 0.000161946, 0.000161867, 0.000160526, 0.000161769, 0.000162253, 0.000162072] +Median duration 0.000161946 (86.44859397576971 GB/s) 18.95802499468634% of peak. +Median achieved 19.759672977412226 GFLOPs +./sycl_spmm 100000 100000 8 16 k 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 8 columns. +Using method k with WG size 16 +OK! +Completed in 0.0008043880000000001 s (achieved 24.366355539863847 GB/s) +Achieved 31.82543747544717 GFLOPs +Completed in 0.0008055720000000001 s (achieved 24.330542769609668 GB/s) +Achieved 31.778661621804133 GFLOPs +Completed in 0.0008066880000000001 s (achieved 24.296883057638144 GB/s) +Achieved 31.734697925344122 GFLOPs +Completed in 0.0008054020000000001 s (achieved 24.335678332062745 GB/s) +Achieved 31.78536929384332 GFLOPs +Completed in 0.000804019 s (achieved 24.377538341755606 GB/s) +Achieved 31.840043581059653 GFLOPs +Completed in 0.0008043600000000001 s (achieved 24.367203739619075 GB/s) +Achieved 31.826545327962602 GFLOPs +Completed in 0.0008056910000000001 s (achieved 24.326949165374813 GB/s) +Achieved 31.773967935598137 GFLOPs +Completed in 0.0008064540000000001 s (achieved 24.303933020358258 GB/s) +Achieved 31.743906038038126 GFLOPs +Completed in 0.0008055590000000001 s (achieved 24.330935412552027 GB/s) +Achieved 31.779174461460922 GFLOPs +Completed in 0.0008042990000000001 s (achieved 24.369051807847576 GB/s) +Achieved 31.828959130870484 GFLOPs +Durations: [0.0008043880000000001, 0.0008055720000000001, 0.0008066880000000001, 0.0008054020000000001, 0.000804019, 0.0008043600000000001, 0.0008056910000000001, 0.0008064540000000001, 0.0008055590000000001, 0.0008042990000000001] +Median duration 0.0008055590000000001 (24.330935412552027 GB/s) 5.335731450121058% of peak. +Median achieved 31.779174461460922 GFLOPs +./sycl_spmm 100000 100000 32 16 k 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 32 columns. +Using method k with WG size 16 +OK! +Completed in 0.003159451 s (achieved 12.280615841169872 GB/s) +Achieved 32.410694136418 GFLOPs +Completed in 0.0031610070000000004 s (achieved 12.274570730150232 GB/s) +Achieved 32.39474003062948 GFLOPs +Completed in 0.003161726 s (achieved 12.271779401504114 GB/s) +Achieved 32.38737322588991 GFLOPs +Completed in 0.0031609620000000002 s (achieved 12.274745473055354 GB/s) +Achieved 32.39520120773359 GFLOPs +Completed in 0.003161628 s (achieved 12.272159786034283 GB/s) +Achieved 32.38837712722686 GFLOPs +Completed in 0.003162427 s (achieved 12.26905917512088 GB/s) +Achieved 32.380194072463965 GFLOPs +Completed in 0.003162593 s (achieved 12.268415189687703 GB/s) +Achieved 32.3784944822176 GFLOPs +Completed in 0.0031618 s (achieved 12.271492187994179 GB/s) +Achieved 32.38661521917895 GFLOPs +Completed in 0.00316276 s (achieved 12.267767393036461 GB/s) +Achieved 32.376784833499855 GFLOPs +Completed in 0.0031602540000000004 s (achieved 12.277495416507659 GB/s) +Achieved 32.40245878970487 GFLOPs +Durations: [0.003159451, 0.0031610070000000004, 0.003161726, 0.0031609620000000002, 0.003161628, 0.003162427, 0.003162593, 0.0031618, 0.00316276, 0.0031602540000000004] +Median duration 0.003161726 (12.271779401504114 GB/s) 2.6911796933123053% of peak. +Median achieved 32.38737322588991 GFLOPs +./sycl_spmm 100000 100000 64 16 k 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 64 columns. +Using method k with WG size 16 +OK! +Completed in 0.006422938 s (achieved 10.02656479013187 GB/s) +Achieved 31.885719588138635 GFLOPs +Completed in 0.006420474000000001 s (achieved 10.030412707846805 GB/s) +Achieved 31.89795644371428 GFLOPs +Completed in 0.006425717 s (achieved 10.022228492166713 GB/s) +Achieved 31.871929622795403 GFLOPs +Completed in 0.006423947 s (achieved 10.02498993220212 GB/s) +Achieved 31.880711344598577 GFLOPs +Completed in 0.006423445000000001 s (achieved 10.025773397296934 GB/s) +Achieved 31.883202860770194 GFLOPs +Completed in 0.006424534 s (achieved 10.024073963963769 GB/s) +Achieved 31.877798451996675 GFLOPs +Completed in 0.006423257000000001 s (achieved 10.026066838054277 GB/s) +Achieved 31.88413603877285 GFLOPs +Completed in 0.006424812 s (achieved 10.023640224803467 GB/s) +Achieved 31.876419107671946 GFLOPs +Completed in 0.006420479000000001 s (achieved 10.030404896581704 GB/s) +Achieved 31.89793160292246 GFLOPs +Completed in 0.006423041 s (achieved 10.026404003960119 GB/s) +Achieved 31.885208268170796 GFLOPs +Durations: [0.006422938, 0.006420474000000001, 0.006425717, 0.006423947, 0.006423445000000001, 0.006424534, 0.006423257000000001, 0.006424812, 0.006420479000000001, 0.006423041] +Median duration 0.006423445000000001 (10.025773397296934 GB/s) 2.1986345169510817% of peak. +Median achieved 31.883202860770194 GFLOPs +./sycl_spmm 100000 100000 128 16 k 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 128 columns. +Using method k with WG size 16 +OK! +Completed in 0.012827357000000001 s (achieved 9.011989297561454 GB/s) +Achieved 31.93175336119514 GFLOPs +Completed in 0.012827045 s (achieved 9.012208501646326 GB/s) +Achieved 31.932530056610858 GFLOPs +Completed in 0.012830263000000001 s (achieved 9.009948120315226 GB/s) +Achieved 31.924520954870527 GFLOPs +Completed in 0.012830706 s (achieved 9.009637037899552 GB/s) +Achieved 31.92341871133202 GFLOPs +Completed in 0.012822215000000001 s (achieved 9.015603310348485 GB/s) +Achieved 31.94455872093862 GFLOPs +Completed in 0.012839605 s (achieved 9.00339254984869 GB/s) +Achieved 31.90129291360599 GFLOPs +Completed in 0.01282868 s (achieved 9.011059906397229 GB/s) +Achieved 31.92846029365453 GFLOPs +Completed in 0.012827001000000001 s (achieved 9.01223941590088 GB/s) +Achieved 31.93263959361974 GFLOPs +Completed in 0.012828409 s (achieved 9.011250264939322 GB/s) +Achieved 31.92913478202948 GFLOPs +Completed in 0.012821604 s (achieved 9.016032939404461 GB/s) +Achieved 31.94608100515349 GFLOPs +Durations: [0.012827357000000001, 0.012827045, 0.012830263000000001, 0.012830706, 0.012822215000000001, 0.012839605, 0.01282868, 0.012827001000000001, 0.012828409, 0.012821604] +Median duration 0.012828409 (9.011250264939322 GB/s) 1.9761513738902021% of peak. +Median achieved 31.92913478202948 GFLOPs +./sycl_spmm 100000 100000 256 16 k 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 256 columns. +Using method k with WG size 16 +OK! +Completed in 0.025839466000000002 s (achieved 8.436707012443678 GB/s) +Achieved 31.703441549449977 GFLOPs +Completed in 0.025834685000000003 s (achieved 8.43826832028337 GB/s) +Achieved 31.709308629077533 GFLOPs +Completed in 0.025843590000000003 s (achieved 8.435360721943043 GB/s) +Achieved 31.69838246156977 GFLOPs +Completed in 0.025825834000000002 s (achieved 8.441160273856015 GB/s) +Achieved 31.72017600670708 GFLOPs +Completed in 0.025847616 s (achieved 8.43404683820744 GB/s) +Achieved 31.69344515176951 GFLOPs +Completed in 0.025847013000000002 s (achieved 8.434243600991728 GB/s) +Achieved 31.69418454658571 GFLOPs +Completed in 0.025831357000000003 s (achieved 8.43935547017526 GB/s) +Achieved 31.713393918871546 GFLOPs +Completed in 0.025840143000000003 s (achieved 8.436485974555172 GB/s) +Achieved 31.70261093369336 GFLOPs +Completed in 0.025838756 s (achieved 8.436938837148352 GB/s) +Achieved 31.70431269988385 GFLOPs +Completed in 0.025846819 s (achieved 8.434306906393395 GB/s) +Achieved 31.694422435503572 GFLOPs +Durations: [0.025839466000000002, 0.025834685000000003, 0.025843590000000003, 0.025825834000000002, 0.025847616, 0.025847013000000002, 0.025831357000000003, 0.025840143000000003, 0.025838756, 0.025846819] +Median duration 0.025840143000000003 (8.436485974555172 GB/s) 1.850106573367362% of peak. +Median achieved 31.70261093369336 GFLOPs +./sycl_spmm 100000 100000 512 16 k 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 512 columns. +Using method k with WG size 16 +OK! +Completed in 0.051530706 s (achieved 8.204816832899592 GB/s) +Achieved 31.79463522195873 GFLOPs +Completed in 0.051552079 s (achieved 8.201415194137951 GB/s) +Achieved 31.78145346960692 GFLOPs +Completed in 0.051548201 s (achieved 8.202032191191309 GB/s) +Achieved 31.783844406131653 GFLOPs +Completed in 0.051548427 s (achieved 8.20199623162119 GB/s) +Achieved 31.78370505854621 GFLOPs +Completed in 0.051539406 s (achieved 8.203431836214799 GB/s) +Achieved 31.789268196067297 GFLOPs +Completed in 0.051525591 s (achieved 8.205631333758015 GB/s) +Achieved 31.7977915090775 GFLOPs +Completed in 0.051558205 s (achieved 8.200440725195921 GB/s) +Achieved 31.777677287252338 GFLOPs +Completed in 0.051544748 s (achieved 8.20258164808566 GB/s) +Achieved 31.785973616555463 GFLOPs +Completed in 0.051537296 s (achieved 8.203767694758374 GB/s) +Achieved 31.790569687629713 GFLOPs +Completed in 0.051554687 s (achieved 8.201000308662527 GB/s) +Achieved 31.77984573934083 GFLOPs +Durations: [0.051530706, 0.051552079, 0.051548201, 0.051548427, 0.051539406, 0.051525591, 0.051558205, 0.051544748, 0.051537296, 0.051554687] +Median duration 0.051548201 (8.202032191191309 GB/s) 1.798691269998094% of peak. +Median achieved 31.783844406131653 GFLOPs +./sycl_spmm 100000 100000 1 32 k 16 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 1 columns. +Using method k with WG size 16 +OK! +Completed in 0.000273313 s (achieved 98.0560895383681 GB/s) +Achieved 23.416376096270575 GFLOPs +Completed in 0.000274265 s (achieved 97.71572748983648 GB/s) +Achieved 23.33509561920041 GFLOPs +Completed in 0.000273547 s (achieved 97.97220952889266 GB/s) +Achieved 23.396345052221374 GFLOPs +Completed in 0.000274397 s (achieved 97.66872086793953 GB/s) +Achieved 23.32387015893031 GFLOPs +Completed in 0.000274379 s (achieved 97.67512819858663 GB/s) +Achieved 23.325400267513185 GFLOPs +Completed in 0.000273781 s (achieved 97.88847290352508 GB/s) +Achieved 23.37634824914804 GFLOPs +Completed in 0.000272079 s (achieved 98.50081777718972 GB/s) +Achieved 23.522579838943837 GFLOPs +Completed in 0.00027453000000000004 s (achieved 97.62140385385932 GB/s) +Achieved 23.312570575164823 GFLOPs +Completed in 0.00027383800000000004 s (achieved 97.86809719615246 GB/s) +Achieved 23.37148240930769 GFLOPs +Completed in 0.00027436800000000004 s (achieved 97.67904420340564 GB/s) +Achieved 23.32633543270352 GFLOPs +Durations: [0.000273313, 0.000274265, 0.000273547, 0.000274397, 0.000274379, 0.000273781, 0.000272079, 0.00027453000000000004, 0.00027383800000000004, 0.00027436800000000004] +Median duration 0.000274265 (97.71572748983648 GB/s) 21.42888760742028% of peak. +Median achieved 23.33509561920041 GFLOPs +./sycl_spmm 100000 100000 8 32 k 16 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 8 columns. +Using method k with WG size 16 +OK! +Completed in 0.001401546 s (achieved 23.117331860673858 GB/s) +Achieved 36.53108781302933 GFLOPs +Completed in 0.0014015990000000001 s (achieved 23.11645770295213 GB/s) +Achieved 36.52970642815812 GFLOPs +Completed in 0.0014005270000000002 s (achieved 23.134151644345305 GB/s) +Achieved 36.55766722098181 GFLOPs +Completed in 0.001400757 s (achieved 23.13035308765189 GB/s) +Achieved 36.551664564232055 GFLOPs +Completed in 0.001401852 s (achieved 23.112285747710885 GB/s) +Achieved 36.523113709578475 GFLOPs +Completed in 0.001400865 s (achieved 23.128569847915397 GB/s) +Achieved 36.548846605490176 GFLOPs +Completed in 0.0014008710000000001 s (achieved 23.128470787103165 GB/s) +Achieved 36.54869006496672 GFLOPs +Completed in 0.001402026 s (achieved 23.109417371717786 GB/s) +Achieved 36.51858096782799 GFLOPs +Completed in 0.001400767 s (achieved 23.13018796130977 GB/s) +Achieved 36.55140362387178 GFLOPs +Completed in 0.001399485 s (achieved 23.151376399175412 GB/s) +Achieved 36.58488658327885 GFLOPs +Durations: [0.001401546, 0.0014015990000000001, 0.0014005270000000002, 0.001400757, 0.001401852, 0.001400865, 0.0014008710000000001, 0.001402026, 0.001400767, 0.001399485] +Median duration 0.0014008710000000001 (23.128470787103165 GB/s) 5.072033067347185% of peak. +Median achieved 36.54869006496672 GFLOPs +./sycl_spmm 100000 100000 32 32 k 16 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 32 columns. +Using method k with WG size 16 +OK! +Completed in 0.005621340000000001 s (achieved 9.179306713345929 GB/s) +Achieved 36.43259436362148 GFLOPs +Completed in 0.005620278 s (achieved 9.18104122251604 GB/s) +Achieved 36.43947861653819 GFLOPs +Completed in 0.005620507 s (achieved 9.180667153336879 GB/s) +Achieved 36.43799393898095 GFLOPs +Completed in 0.005621672 s (achieved 9.178764609532537 GB/s) +Achieved 36.430442757955284 GFLOPs +Completed in 0.005622302 s (achieved 9.177736094574785 GB/s) +Achieved 36.42636059037739 GFLOPs +Completed in 0.005622458 s (achieved 9.177481450283844 GB/s) +Achieved 36.42534990923898 GFLOPs +Completed in 0.005622002 s (achieved 9.178225834853848 GB/s) +Achieved 36.42830436559788 GFLOPs +Completed in 0.005620827 s (achieved 9.180144487635005 GB/s) +Achieved 36.435919483022694 GFLOPs +Completed in 0.005619762 s (achieved 9.181884215025477 GB/s) +Achieved 36.44282444701395 GFLOPs +Completed in 0.005623347000000001 s (achieved 9.176030573962446 GB/s) +Achieved 36.419591392812855 GFLOPs +Durations: [0.005621340000000001, 0.005620278, 0.005620507, 0.005621672, 0.005622302, 0.005622458, 0.005622002, 0.005620827, 0.005619762, 0.005623347000000001] +Median duration 0.005621672 (9.178764609532537 GB/s) 2.0128869757746792% of peak. +Median achieved 36.430442757955284 GFLOPs +./sycl_spmm 100000 100000 64 32 k 16 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 64 columns. +Using method k with WG size 16 +OK! +Completed in 0.011327591000000001 s (achieved 6.815218169511946 GB/s) +Achieved 36.15949763722931 GFLOPs +Completed in 0.011332887000000002 s (achieved 6.812033332724485 GB/s) +Achieved 36.142599851211784 GFLOPs +Completed in 0.011325565000000001 s (achieved 6.816437325643356 GB/s) +Achieved 36.16596611294889 GFLOPs +Completed in 0.011327831 s (achieved 6.815073777142333 GB/s) +Achieved 36.158731534748355 GFLOPs +Completed in 0.011335194000000002 s (achieved 6.810646910851283 GB/s) +Achieved 36.13524391377862 GFLOPs +Completed in 0.011329760000000001 s (achieved 6.813913445651099 GB/s) +Achieved 36.152575164875515 GFLOPs +Completed in 0.011327818 s (achieved 6.815081598238955 GB/s) +Achieved 36.15877303113451 GFLOPs +Completed in 0.011334026 s (achieved 6.811348765213703 GB/s) +Achieved 36.138967741912715 GFLOPs +Completed in 0.011324490000000001 s (achieved 6.817084389672294 GB/s) +Achieved 36.16939924005408 GFLOPs +Completed in 0.011327699 s (achieved 6.815153192188458 GB/s) +Achieved 36.15915288709561 GFLOPs +Durations: [0.011327591000000001, 0.011332887000000002, 0.011325565000000001, 0.011327831, 0.011335194000000002, 0.011329760000000001, 0.011327818, 0.011334026, 0.011324490000000001, 0.011327699] +Median duration 0.011327831 (6.815073777142333 GB/s) 1.494533723057529% of peak. +Median achieved 36.158731534748355 GFLOPs +./sycl_spmm 100000 100000 128 32 k 16 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 128 columns. +Using method k with WG size 16 +OK! +Completed in 0.022278411 s (achieved 5.763427382680031 GB/s) +Achieved 36.77102464803257 GFLOPs +Completed in 0.022271390000000002 s (achieved 5.765244288748929 GB/s) +Achieved 36.78261662159389 GFLOPs +Completed in 0.022273952000000003 s (achieved 5.7645811573985615 GB/s) +Achieved 36.77838580239375 GFLOPs +Completed in 0.022282741000000002 s (achieved 5.76230742887511 GB/s) +Achieved 36.76387927320072 GFLOPs +Completed in 0.022273703000000002 s (achieved 5.764645600239888 GB/s) +Achieved 36.77879695172374 GFLOPs +Completed in 0.022271079000000003 s (achieved 5.7653247963423775 GB/s) +Achieved 36.78313026504014 GFLOPs +Completed in 0.022272767000000002 s (achieved 5.764887856097987 GB/s) +Achieved 36.78034255914409 GFLOPs +Completed in 0.022264243000000003 s (achieved 5.767094978257289 GB/s) +Achieved 36.794424135597154 GFLOPs +Completed in 0.022280086 s (achieved 5.762994092572175 GB/s) +Achieved 36.7682602302343 GFLOPs +Completed in 0.022279401 s (achieved 5.763171280951405 GB/s) +Achieved 36.769390703098345 GFLOPs +Durations: [0.022278411, 0.022271390000000002, 0.022273952000000003, 0.022282741000000002, 0.022273703000000002, 0.022271079000000003, 0.022272767000000002, 0.022264243000000003, 0.022280086, 0.022279401] +Median duration 0.022273952000000003 (5.7645811573985615 GB/s) 1.2641625345172283% of peak. +Median achieved 36.77838580239375 GFLOPs +./sycl_spmm 100000 100000 256 32 k 16 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 256 columns. +Using method k with WG size 16 +OK! +Completed in 0.04487407 s (achieved 5.143282167184746 GB/s) +Achieved 36.51106307049929 GFLOPs +Completed in 0.044870716000000005 s (achieved 5.143666617666631 GB/s) +Achieved 36.51379220246898 GFLOPs +Completed in 0.044942808 s (achieved 5.135415748833495 GB/s) +Achieved 36.45522104448837 GFLOPs +Completed in 0.044877727000000006 s (achieved 5.1428630509740385 GB/s) +Achieved 36.5080878539147 GFLOPs +Completed in 0.044891552 s (achieved 5.141279232226143 GB/s) +Achieved 36.496844662443394 GFLOPs +Completed in 0.044881079000000004 s (achieved 5.142478949759652 GB/s) +Achieved 36.50536120132049 GFLOPs +Completed in 0.044861477000000004 s (achieved 5.144725930445848 GB/s) +Achieved 36.521312037942934 GFLOPs +Completed in 0.044910967 s (achieved 5.139056658477204 GB/s) +Achieved 36.48106708546267 GFLOPs +Completed in 0.044880017 s (achieved 5.142600636715445 GB/s) +Achieved 36.506225031064496 GFLOPs +Completed in 0.044864209 s (achieved 5.1444126430491615 GB/s) +Achieved 36.519088077536374 GFLOPs +Durations: [0.04487407, 0.044870716000000005, 0.044942808, 0.044877727000000006, 0.044891552, 0.044881079000000004, 0.044861477000000004, 0.044910967, 0.044880017, 0.044864209] +Median duration 0.044880017 (5.142600636715445 GB/s) 1.1277632975253167% of peak. +Median achieved 36.506225031064496 GFLOPs +./sycl_spmm 100000 100000 512 32 k 16 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 512 columns. +Using method k with WG size 16 +OK! +Completed in 0.09052855200000001 s (achieved 4.811741648093521 GB/s) +Achieved 36.19631516916342 GFLOPs +Completed in 0.09048671400000001 s (achieved 4.813966434895624 GB/s) +Achieved 36.21305112262116 GFLOPs +Completed in 0.09049068 s (achieved 4.813755449732503 GB/s) +Achieved 36.21146398723051 GFLOPs +Completed in 0.090556628 s (achieved 4.810249825114955 GB/s) +Achieved 36.18509293433497 GFLOPs +Completed in 0.090480272 s (achieved 4.814309178911399 GB/s) +Achieved 36.21562941366931 GFLOPs +Completed in 0.09054714200000001 s (achieved 4.8107537618360166 GB/s) +Achieved 36.18888379712747 GFLOPs +Completed in 0.09051766600000001 s (achieved 4.812320326509523 GB/s) +Achieved 36.20066827617937 GFLOPs +Completed in 0.090479157 s (achieved 4.814368506992168 GB/s) +Achieved 36.21607570901661 GFLOPs +Completed in 0.09046858 s (achieved 4.814931371753596 GB/s) +Achieved 36.22030985785341 GFLOPs +Completed in 0.090486124 s (achieved 4.813997823577901 GB/s) +Achieved 36.213287243909356 GFLOPs +Durations: [0.09052855200000001, 0.09048671400000001, 0.09049068, 0.090556628, 0.090480272, 0.09054714200000001, 0.09051766600000001, 0.090479157, 0.09046858, 0.090486124] +Median duration 0.09049068 (4.813755449732503 GB/s) 1.0556481249413383% of peak. +Median achieved 36.21146398723051 GFLOPs +./sycl_spmm 100000 100000 1 64 k 16 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 1 columns. +Using method k with WG size 16 +OK! +Completed in 0.000486913 s (achieved 107.61676932018656 GB/s) +Achieved 26.28806378141475 GFLOPs +Completed in 0.00048514900000000004 s (achieved 108.00806350214057 GB/s) +Achieved 26.38364708574067 GFLOPs +Completed in 0.00048538000000000005 s (achieved 107.95666076064114 GB/s) +Achieved 26.37109069182908 GFLOPs +Completed in 0.000483856 s (achieved 108.29669157765947 GB/s) +Achieved 26.454151648424325 GFLOPs +Completed in 0.00048691600000000005 s (achieved 107.61610626884307 GB/s) +Achieved 26.28790181468672 GFLOPs +Completed in 0.00048526700000000006 s (achieved 107.98179971026259 GB/s) +Achieved 26.377231503481585 GFLOPs +Completed in 0.000483908 s (achieved 108.28505418385312 GB/s) +Achieved 26.45130892649016 GFLOPs +Completed in 0.00048527300000000004 s (achieved 107.98046460445975 GB/s) +Achieved 26.376905370791285 GFLOPs +Completed in 0.000485753 s (achieved 107.8737630030077 GB/s) +Achieved 26.350840859449146 GFLOPs +Completed in 0.00048448 s (achieved 108.15720772787319 GB/s) +Achieved 26.42007926023778 GFLOPs +Durations: [0.000486913, 0.00048514900000000004, 0.00048538000000000005, 0.000483856, 0.00048691600000000005, 0.00048526700000000006, 0.000483908, 0.00048527300000000004, 0.000485753, 0.00048448] +Median duration 0.00048527300000000004 (107.98046460445975 GB/s) 23.679926448346436% of peak. +Median achieved 26.376905370791285 GFLOPs +./sycl_spmm 100000 100000 8 64 k 16 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 8 columns. +Using method k with WG size 16 +OK! +Completed in 0.002556253 s (achieved 22.68946148914055 GB/s) +Achieved 40.0586326940252 GFLOPs +Completed in 0.002559656 s (achieved 22.659296405454484 GB/s) +Achieved 40.00537572236269 GFLOPs +Completed in 0.002557054 s (achieved 22.68235399017776 GB/s) +Achieved 40.04608428292872 GFLOPs +Completed in 0.002558148 s (achieved 22.672653810491028 GB/s) +Achieved 40.028958449628405 GFLOPs +Completed in 0.002555664 s (achieved 22.694690694864427 GB/s) +Achieved 40.06786494625272 GFLOPs +Completed in 0.0025564760000000002 s (achieved 22.68748229985339 GB/s) +Achieved 40.05513840145575 GFLOPs +Completed in 0.002557532 s (achieved 22.678114682436036 GB/s) +Achieved 40.038599712535365 GFLOPs +Completed in 0.002555878 s (achieved 22.692790500955052 GB/s) +Achieved 40.06451012137512 GFLOPs +Completed in 0.002556413 s (achieved 22.688041408019753 GB/s) +Achieved 40.056125516495186 GFLOPs +Completed in 0.00255754 s (achieved 22.678043745161364 GB/s) +Achieved 40.038474471562516 GFLOPs +Durations: [0.002556253, 0.002559656, 0.002557054, 0.002558148, 0.002555664, 0.0025564760000000002, 0.002557532, 0.002555878, 0.002556413, 0.00255754] +Median duration 0.002557054 (22.68235399017776 GB/s) 4.974200436442491% of peak. +Median achieved 40.04608428292872 GFLOPs +./sycl_spmm 100000 100000 32 64 k 16 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 32 columns. +Using method k with WG size 16 +OK! +Completed in 0.010297031 s (achieved 7.4973071363968895 GB/s) +Achieved 39.77845652790596 GFLOPs +Completed in 0.010299062000000001 s (achieved 7.4958286492498045 GB/s) +Achieved 39.77061211982217 GFLOPs +Completed in 0.010286716000000001 s (achieved 7.504825057870752 GB/s) +Achieved 39.818344357907804 GFLOPs +Completed in 0.010282177 s (achieved 7.508138013963386 GB/s) +Achieved 39.835921906421184 GFLOPs +Completed in 0.010289507 s (achieved 7.502789395060424 GB/s) +Achieved 39.80754374334942 GFLOPs +Completed in 0.010286825000000001 s (achieved 7.504745536159115 GB/s) +Achieved 39.81792243962544 GFLOPs +Completed in 0.010295646 s (achieved 7.498315695780527 GB/s) +Achieved 39.78380764062789 GFLOPs +Completed in 0.010284769000000001 s (achieved 7.506245789283162 GB/s) +Achieved 39.82588233143593 GFLOPs +Completed in 0.010286633 s (achieved 7.504885612230941 GB/s) +Achieved 39.81866564112864 GFLOPs +Completed in 0.010289608 s (achieved 7.502715749715636 GB/s) +Achieved 39.80715300330197 GFLOPs +Durations: [0.010297031, 0.010299062000000001, 0.010286716000000001, 0.010282177, 0.010289507, 0.010286825000000001, 0.010295646, 0.010284769000000001, 0.010286633, 0.010289608] +Median duration 0.010289507 (7.502789395060424 GB/s) 1.6453485515483386% of peak. +Median achieved 39.80754374334942 GFLOPs +./sycl_spmm 100000 100000 64 64 k 16 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 64 columns. +Using method k with WG size 16 +OK! +Completed in 0.020478606 s (achieved 5.019873130036292 GB/s) +Achieved 40.002722841584045 GFLOPs +Completed in 0.020482417000000003 s (achieved 5.018939122272532 GB/s) +Achieved 39.9952798539352 GFLOPs +Completed in 0.020486166 s (achieved 5.018020648666032 GB/s) +Achieved 39.98796065598609 GFLOPs +Completed in 0.020474919 s (achieved 5.020777078532032 GB/s) +Achieved 40.00992629079509 GFLOPs +Completed in 0.020475059 s (achieved 5.020742748531274 GB/s) +Achieved 40.00965271943783 GFLOPs +Completed in 0.020484686000000002 s (achieved 5.0183831961104985 GB/s) +Achieved 39.990849749905856 GFLOPs +Completed in 0.020472896 s (achieved 5.02127319945356 GB/s) +Achieved 40.013879814560674 GFLOPs +Completed in 0.020490664000000002 s (achieved 5.016919119848922 GB/s) +Achieved 39.979182714625544 GFLOPs +Completed in 0.020474735 s (achieved 5.020822198675587 GB/s) +Achieved 40.01028584741146 GFLOPs +Completed in 0.020488586 s (achieved 5.017427947443518 GB/s) +Achieved 39.98323749623327 GFLOPs +Durations: [0.020478606, 0.020482417000000003, 0.020486166, 0.020474919, 0.020475059, 0.020484686000000002, 0.020472896, 0.020490664000000002, 0.020474735, 0.020488586] +Median duration 0.020482417000000003 (5.018939122272532 GB/s) 1.1006445443580113% of peak. +Median achieved 39.9952798539352 GFLOPs +./sycl_spmm 100000 100000 128 64 k 16 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 128 columns. +Using method k with WG size 16 +OK! +Completed in 0.042532146 s (achieved 3.6207908248974787 GB/s) +Achieved 38.5214515157547 GFLOPs +Completed in 0.042539352 s (achieved 3.6201774770805155 GB/s) +Achieved 38.514926132396184 GFLOPs +Completed in 0.042526688 s (achieved 3.621255527822905 GB/s) +Achieved 38.52639547194458 GFLOPs +Completed in 0.042549858 s (achieved 3.619283617820769 GB/s) +Achieved 38.505416398804435 GFLOPs +Completed in 0.042525598000000005 s (achieved 3.621348346471224 GB/s) +Achieved 38.527382965902085 GFLOPs +Completed in 0.042514862 s (achieved 3.6222628218809696 GB/s) +Achieved 38.537112033904755 GFLOPs +Completed in 0.042565005 s (achieved 3.6179956750856714 GB/s) +Achieved 38.49171402658123 GFLOPs +Completed in 0.042561091 s (achieved 3.618328392944626 GB/s) +Achieved 38.49525379882767 GFLOPs +Completed in 0.042536330000000004 s (achieved 3.6204346731370567 GB/s) +Achieved 38.51766243115003 GFLOPs +Completed in 0.042518245 s (achieved 3.621974613486516 GB/s) +Achieved 38.53404579610471 GFLOPs +Durations: [0.042532146, 0.042539352, 0.042526688, 0.042549858, 0.042525598000000005, 0.042514862, 0.042565005, 0.042561091, 0.042536330000000004, 0.042518245] +Median duration 0.042536330000000004 (3.6204346731370567 GB/s) 0.7939549721791791% of peak. +Median achieved 38.51766243115003 GFLOPs +./sycl_spmm 100000 100000 256 64 k 16 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 256 columns. +Using method k with WG size 16 +OK! +Completed in 0.08541072300000001 s (achieved 3.001965034296689 GB/s) +Achieved 38.365206204846196 GFLOPs +Completed in 0.085369661 s (achieved 3.003408951102664 GB/s) +Achieved 38.383659506390686 GFLOPs +Completed in 0.085398811 s (achieved 3.0023837685515318 GB/s) +Achieved 38.370557641604634 GFLOPs +Completed in 0.085372536 s (achieved 3.003307808497103 GB/s) +Achieved 38.382366900756004 GFLOPs +Completed in 0.08537201300000001 s (achieved 3.0033262071494082 GB/s) +Achieved 38.38260203610286 GFLOPs +Completed in 0.085436387 s (achieved 3.0010632823225545 GB/s) +Achieved 38.35368178666076 GFLOPs +Completed in 0.08540502100000001 s (achieved 3.0021654581643387 GB/s) +Achieved 38.36776762808828 GFLOPs +Completed in 0.085392855 s (achieved 3.002593179487909 GB/s) +Achieved 38.37323391986367 GFLOPs +Completed in 0.085387528 s (achieved 3.0027804997469887 GB/s) +Achieved 38.37562787858199 GFLOPs +Completed in 0.08537504 s (achieved 3.003219723235269 GB/s) +Achieved 38.381241168378956 GFLOPs +Durations: [0.08541072300000001, 0.085369661, 0.085398811, 0.085372536, 0.08537201300000001, 0.085436387, 0.08540502100000001, 0.085392855, 0.085387528, 0.08537504] +Median duration 0.085392855 (3.002593179487909 GB/s) 0.658463416554366% of peak. +Median achieved 38.37323391986367 GFLOPs +./sycl_spmm 100000 100000 512 64 k 16 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 512 columns. +Using method k with WG size 16 +OK! +Completed in 0.16826592 s (achieved 2.7408996664327514 GB/s) +Achieved 38.94787488755893 GFLOPs +Completed in 0.168438565 s (achieved 2.7380903179743905 GB/s) +Achieved 38.907954363064064 GFLOPs +Completed in 0.168244307 s (achieved 2.741251767882999 GB/s) +Achieved 38.95287820942435 GFLOPs +Completed in 0.16833207900000002 s (achieved 2.739822419706466 GB/s) +Achieved 38.93256733317005 GFLOPs +Completed in 0.16826375200000002 s (achieved 2.7409349816471464 GB/s) +Achieved 38.94837671276937 GFLOPs +Completed in 0.168228924 s (achieved 2.7415024303430724 GB/s) +Achieved 38.95644009468907 GFLOPs +Completed in 0.16823294900000002 s (achieved 2.741436839462405 GB/s) +Achieved 38.9555080556782 GFLOPs +Completed in 0.168255396 s (achieved 2.7410711035977715 GB/s) +Achieved 38.95031099032331 GFLOPs +Completed in 0.16832169700000002 s (achieved 2.7399914106141647 GB/s) +Achieved 38.934968674894 GFLOPs +Completed in 0.16832540000000001 s (achieved 2.7399311333880685 GB/s) +Achieved 38.934112142314824 GFLOPs +Durations: [0.16826592, 0.168438565, 0.168244307, 0.16833207900000002, 0.16826375200000002, 0.168228924, 0.16823294900000002, 0.168255396, 0.16832169700000002, 0.16832540000000001] +Median duration 0.16826592 (2.7408996664327514 GB/s) 0.6010744882527963% of peak. +Median achieved 38.94787488755893 GFLOPs +./sycl_spmm 100000 100000 1 128 k 16 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 1 columns. +Using method k with WG size 16 +OK! +Completed in 0.000908396 s (achieved 114.04718206597124 GB/s) +Achieved 28.181541970682392 GFLOPs +Completed in 0.0009068570000000001 s (achieved 114.24072814126153 GB/s) +Achieved 28.229368026050413 GFLOPs +Completed in 0.000905044 s (achieved 114.46957716972877 GB/s) +Achieved 28.28591759074697 GFLOPs +Completed in 0.000909726 s (achieved 113.88044751936297 GB/s) +Achieved 28.140341157667255 GFLOPs +Completed in 0.000907179 s (achieved 114.20017879602592 GB/s) +Achieved 28.21934811101227 GFLOPs +Completed in 0.0009067830000000001 s (achieved 114.25005100448509 GB/s) +Achieved 28.23167174505918 GFLOPs +Completed in 0.000907503 s (achieved 114.15940663557035 GB/s) +Achieved 28.20927313738908 GFLOPs +Completed in 0.0009067410000000001 s (achieved 114.25534303621431 GB/s) +Achieved 28.232979428524793 GFLOPs +Completed in 0.0009091930000000001 s (achieved 113.94720812852717 GB/s) +Achieved 28.156837987094047 GFLOPs +Completed in 0.000907433 s (achieved 114.16821296999339 GB/s) +Achieved 28.21144921994241 GFLOPs +Durations: [0.000908396, 0.0009068570000000001, 0.000905044, 0.000909726, 0.000907179, 0.0009067830000000001, 0.000907503, 0.0009067410000000001, 0.0009091930000000001, 0.000907433] +Median duration 0.000907433 (114.16821296999339 GB/s) 25.036888809209074% of peak. +Median achieved 28.21144921994241 GFLOPs +./sycl_spmm 100000 100000 8 128 k 16 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 8 columns. +Using method k with WG size 16 +OK! +Completed in 0.004847797 s (achieved 22.525696517407805 GB/s) +Achieved 42.24599338627422 GFLOPs +Completed in 0.004848701 s (achieved 22.521496788521297 GB/s) +Achieved 42.23811697194774 GFLOPs +Completed in 0.004850236 s (achieved 22.51436919770502 GB/s) +Achieved 42.22474947610797 GFLOPs +Completed in 0.004847021 s (achieved 22.52930284395302 GB/s) +Achieved 42.252756899547165 GFLOPs +Completed in 0.004845455 s (achieved 22.536584077243518 GB/s) +Achieved 42.26641254536467 GFLOPs +Completed in 0.0048471140000000005 s (achieved 22.528870581546048 GB/s) +Achieved 42.251946209641446 GFLOPs +Completed in 0.004848316 s (achieved 22.523285198407034 GB/s) +Achieved 42.24147105922964 GFLOPs +Completed in 0.004849029 s (achieved 22.51997338023757 GB/s) +Achieved 42.235259883989144 GFLOPs +Completed in 0.004846451 s (achieved 22.531952556623395 GB/s) +Achieved 42.257726323860496 GFLOPs +Completed in 0.004846015 s (achieved 22.533979775134828 GB/s) +Achieved 42.26152828664377 GFLOPs +Durations: [0.004847797, 0.004848701, 0.004850236, 0.004847021, 0.004845455, 0.0048471140000000005, 0.004848316, 0.004849029, 0.004846451, 0.004846015] +Median duration 0.004847797 (22.525696517407805 GB/s) 4.939845727501711% of peak. +Median achieved 42.24599338627422 GFLOPs +./sycl_spmm 100000 100000 32 128 k 16 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 32 columns. +Using method k with WG size 16 +OK! +Completed in 0.019692882000000002 s (achieved 6.520122549863448 GB/s) +Achieved 41.59878681038153 GFLOPs +Completed in 0.019697889 s (achieved 6.518465202032564 GB/s) +Achieved 41.588212828288356 GFLOPs +Completed in 0.019689758 s (achieved 6.521157040122078 GB/s) +Achieved 41.60538692248021 GFLOPs +Completed in 0.019696415000000002 s (achieved 6.518953017592287 GB/s) +Achieved 41.59132512185593 GFLOPs +Completed in 0.019693527000000002 s (achieved 6.519909003603062 GB/s) +Achieved 41.59742437197765 GFLOPs +Completed in 0.019695840000000003 s (achieved 6.51914333178986 GB/s) +Achieved 41.592539338256195 GFLOPs +Completed in 0.019691347 s (achieved 6.520630813118067 GB/s) +Achieved 41.602029561512474 GFLOPs +Completed in 0.019705949 s (achieved 6.515799061491533 GB/s) +Achieved 41.57120268605181 GFLOPs +Completed in 0.019704351000000002 s (achieved 6.516327485234099 GB/s) +Achieved 41.574574062347956 GFLOPs +Completed in 0.019699565000000002 s (achieved 6.5179106239147915 GB/s) +Achieved 41.58467458545404 GFLOPs +Durations: [0.019692882000000002, 0.019697889, 0.019689758, 0.019696415000000002, 0.019693527000000002, 0.019695840000000003, 0.019691347, 0.019705949, 0.019704351000000002, 0.019699565000000002] +Median duration 0.019696415000000002 (6.518953017592287 GB/s) 1.4295949599983087% of peak. +Median achieved 41.59132512185593 GFLOPs +./sycl_spmm 100000 100000 64 128 k 16 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 64 columns. +Using method k with WG size 16 +OK! +Completed in 0.039110197 s (achieved 3.9375921323024787 GB/s) +Achieved 41.8918881947846 GFLOPs +Completed in 0.039146531000000005 s (achieved 3.9339374413533594 GB/s) +Achieved 41.853006081177405 GFLOPs +Completed in 0.039107463 s (achieved 3.9378674090927346 GB/s) +Achieved 41.894816853754996 GFLOPs +Completed in 0.039114812000000006 s (achieved 3.9371275515781585 GB/s) +Achieved 41.88694553868749 GFLOPs +Completed in 0.039108326000000006 s (achieved 3.937780512517973 GB/s) +Achieved 41.8938923645057 GFLOPs +Completed in 0.039100623 s (achieved 3.9385562731315047 GB/s) +Achieved 41.90214565123426 GFLOPs +Completed in 0.039100305 s (achieved 3.938588305129589 GB/s) +Achieved 41.90248643840502 GFLOPs +Completed in 0.039114548 s (achieved 3.937154124854006 GB/s) +Achieved 41.88722825072656 GFLOPs +Completed in 0.039111546000000004 s (achieved 3.9374563204430726 GB/s) +Achieved 41.890443297741285 GFLOPs +Completed in 0.039132381 s (achieved 3.93535992609292 GB/s) +Achieved 41.86813984050702 GFLOPs +Durations: [0.039110197, 0.039146531000000005, 0.039107463, 0.039114812000000006, 0.039108326000000006, 0.039100623, 0.039100305, 0.039114548, 0.039111546000000004, 0.039132381] +Median duration 0.039111546000000004 (3.9374563204430726 GB/s) 0.8634772632550597% of peak. +Median achieved 41.890443297741285 GFLOPs +./sycl_spmm 100000 100000 128 128 k 16 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 128 columns. +Using method k with WG size 16 +OK! +Completed in 0.08230353700000001 s (achieved 2.493210030572562 GB/s) +Achieved 39.8135987764414 GFLOPs +Completed in 0.082262475 s (achieved 2.4944545371385924 GB/s) +Achieved 39.83347206609089 GFLOPs +Completed in 0.08225549500000001 s (achieved 2.4946662104458794 GB/s) +Achieved 39.83685223704507 GFLOPs +Completed in 0.082247189 s (achieved 2.4949181424303757 GB/s) +Achieved 39.840875291190805 GFLOPs +Completed in 0.082282439 s (achieved 2.493849313338901 GB/s) +Achieved 39.82380736186005 GFLOPs +Completed in 0.08223529900000001 s (achieved 2.4952788704519695 GB/s) +Achieved 39.84663568864752 GFLOPs +Completed in 0.08227149 s (achieved 2.4941812042057343 GB/s) +Achieved 39.829107264254 GFLOPs +Completed in 0.08221381400000001 s (achieved 2.4959309636212716 GB/s) +Achieved 39.85704884096485 GFLOPs +Completed in 0.082284176 s (achieved 2.493796668754391 GB/s) +Achieved 39.82296669046063 GFLOPs +Completed in 0.08226960900000001 s (achieved 2.4942382307906676 GB/s) +Achieved 39.830017910988246 GFLOPs +Durations: [0.08230353700000001, 0.082262475, 0.08225549500000001, 0.082247189, 0.082282439, 0.08223529900000001, 0.08227149, 0.08221381400000001, 0.082284176, 0.08226960900000001] +Median duration 0.08226960900000001 (2.4942382307906676 GB/s) 0.5469820681558482% of peak. +Median achieved 39.830017910988246 GFLOPs +./sycl_spmm 100000 100000 256 128 k 16 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 256 columns. +Using method k with WG size 16 +OK! +Completed in 0.164444863 s (achieved 1.870535803845694 GB/s) +Achieved 39.85287153664387 GFLOPs +Completed in 0.16454717300000002 s (achieved 1.8693727664345834 GB/s) +Achieved 39.828092336779314 GFLOPs +Completed in 0.164449918 s (achieved 1.8704783057416912 GB/s) +Achieved 39.85164650553368 GFLOPs +Completed in 0.164526372 s (achieved 1.8696091104470476 GB/s) +Achieved 39.83312778573881 GFLOPs +Completed in 0.164557426 s (achieved 1.8692562923292202 GB/s) +Achieved 39.82561078708171 GFLOPs +Completed in 0.16441882200000002 s (achieved 1.8708320632536828 GB/s) +Achieved 39.85918351853901 GFLOPs +Completed in 0.164520533 s (achieved 1.8696754647640246 GB/s) +Achieved 39.83454150370398 GFLOPs +Completed in 0.164493244 s (achieved 1.8699856390454555 GB/s) +Achieved 39.841149950207075 GFLOPs +Completed in 0.164493513 s (achieved 1.8699825810152162 GB/s) +Achieved 39.841084797064305 GFLOPs +Completed in 0.16449002000000001 s (achieved 1.8700222907140507 GB/s) +Achieved 39.84193083568231 GFLOPs +Durations: [0.164444863, 0.16454717300000002, 0.164449918, 0.164526372, 0.164557426, 0.16441882200000002, 0.164520533, 0.164493244, 0.164493513, 0.16449002000000001] +Median duration 0.164493513 (1.8699825810152162 GB/s) 0.41008389934544215% of peak. +Median achieved 39.841084797064305 GFLOPs +./sycl_spmm 100000 100000 512 128 k 16 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 512 columns. +Using method k with WG size 16 +OK! +Completed in 0.3229982 s (achieved 1.5863865619065372 GB/s) +Achieved 40.57979270472715 GFLOPs +Completed in 0.32295484300000005 s (achieved 1.5865995358366556 GB/s) +Achieved 40.58524058114217 GFLOPs +Completed in 0.322950242 s (achieved 1.586622139766039 GB/s) +Achieved 40.58581879000419 GFLOPs +Completed in 0.32296463400000003 s (achieved 1.5865514364647122 GB/s) +Achieved 40.584010198466494 GFLOPs +Completed in 0.322911885 s (achieved 1.5868106062432483 GB/s) +Achieved 40.59063976539606 GFLOPs +Completed in 0.32293348 s (achieved 1.586704494064846 GB/s) +Achieved 40.58792541423701 GFLOPs +Completed in 0.32297458700000004 s (achieved 1.5865025442388754 GB/s) +Achieved 40.58275953457601 GFLOPs +Completed in 0.322830008 s (achieved 1.5872130573437897 GB/s) +Achieved 40.600934470750936 GFLOPs +Completed in 0.322982506 s (achieved 1.5864636458050148 GB/s) +Achieved 40.58176451203831 GFLOPs +Completed in 0.32301412300000004 s (achieved 1.5863083608886042 GB/s) +Achieved 40.5777923214831 GFLOPs +Durations: [0.3229982, 0.32295484300000005, 0.322950242, 0.32296463400000003, 0.322911885, 0.32293348, 0.32297458700000004, 0.322830008, 0.322982506, 0.32301412300000004] +Median duration 0.32296463400000003 (1.5865514364647122 GB/s) 0.34792794659313864% of peak. +Median achieved 40.584010198466494 GFLOPs +./sycl_spmm 100000 100000 1 16 k 32 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 1 columns. +Using method k with WG size 32 +OK! +Completed in 0.000122099 s (achieved 114.66108649538491 GB/s) +Achieved 26.208240853733447 GFLOPs +Completed in 0.00012174500000000001 s (achieved 114.99448848001973 GB/s) +Achieved 26.284446999876792 GFLOPs +Completed in 0.00012193100000000001 s (achieved 114.81906980177314 GB/s) +Achieved 26.24435131344777 GFLOPs +Completed in 0.000122075 s (achieved 114.68362891664961 GB/s) +Achieved 26.21339340569322 GFLOPs +Completed in 0.00012208 s (achieved 114.67893184796856 GB/s) +Achieved 26.212319790301443 GFLOPs +Completed in 0.000122047 s (achieved 114.70993961342762 GB/s) +Achieved 26.219407277524233 GFLOPs +Completed in 0.00012157300000000001 s (achieved 115.15718128202809 GB/s) +Achieved 26.3216339154253 GFLOPs +Completed in 0.00012054800000000001 s (achieved 116.13634402893453 GB/s) +Achieved 26.545442479344327 GFLOPs +Completed in 0.000121922 s (achieved 114.827545479897 GB/s) +Achieved 26.246288610751137 GFLOPs +Completed in 0.00012159200000000001 s (achieved 115.13918678860452 GB/s) +Achieved 26.317520889532204 GFLOPs +Durations: [0.000122099, 0.00012174500000000001, 0.00012193100000000001, 0.000122075, 0.00012208, 0.000122047, 0.00012157300000000001, 0.00012054800000000001, 0.000121922, 0.00012159200000000001] +Median duration 0.00012193100000000001 (114.81906980177314 GB/s) 25.179620570564282% of peak. +Median achieved 26.24435131344777 GFLOPs +./sycl_spmm 100000 100000 8 16 k 32 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 8 columns. +Using method k with WG size 32 +OK! +Completed in 0.000585659 s (achieved 33.46658038209949 GB/s) +Achieved 43.711443006937486 GFLOPs +Completed in 0.000587346 s (achieved 33.37045625576747 GB/s) +Achieved 43.58589315326912 GFLOPs +Completed in 0.000585356 s (achieved 33.483903812380845 GB/s) +Achieved 43.73406952350365 GFLOPs +Completed in 0.000586531 s (achieved 33.41682536813911 GB/s) +Achieved 43.646456879517025 GFLOPs +Completed in 0.0005852660000000001 s (achieved 33.48905284093045 GB/s) +Achieved 43.74079478391022 GFLOPs +Completed in 0.000584986 s (achieved 33.505082172906704 GB/s) +Achieved 43.76173104997385 GFLOPs +Completed in 0.000587013 s (achieved 33.3893866064295 GB/s) +Achieved 43.61061850418985 GFLOPs +Completed in 0.000585441 s (achieved 33.47904229461209 GB/s) +Achieved 43.72771978730564 GFLOPs +Completed in 0.000585331 s (achieved 33.4853339392583 GB/s) +Achieved 43.73593744394198 GFLOPs +Completed in 0.000593802 s (achieved 33.00764227806575 GB/s) +Achieved 43.112013768899395 GFLOPs +Durations: [0.000585659, 0.000587346, 0.000585356, 0.000586531, 0.0005852660000000001, 0.000584986, 0.000587013, 0.000585441, 0.000585331, 0.000593802] +Median duration 0.000585659 (33.46658038209949 GB/s) 7.339162364495501% of peak. +Median achieved 43.711443006937486 GFLOPs +./sycl_spmm 100000 100000 32 16 k 32 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 32 columns. +Using method k with WG size 32 +OK! +Completed in 0.0022535470000000003 s (achieved 17.217304098827313 GB/s) +Achieved 45.43947829798979 GFLOPs +Completed in 0.002253763 s (achieved 17.215653997336897 GB/s) +Achieved 45.435123391412496 GFLOPs +Completed in 0.002255295 s (achieved 17.203959570699176 GB/s) +Achieved 45.40425975315868 GFLOPs +Completed in 0.0022536360000000003 s (achieved 17.216624157583563 GB/s) +Achieved 45.437683814067576 GFLOPs +Completed in 0.002254882 s (achieved 17.207110615987887 GB/s) +Achieved 45.4125759130633 GFLOPs +Completed in 0.0022547360000000002 s (achieved 17.208224820998996 GB/s) +Achieved 45.41551649505751 GFLOPs +Completed in 0.002254074 s (achieved 17.213278712233937 GB/s) +Achieved 45.42885459838497 GFLOPs +Completed in 0.00225523 s (achieved 17.20445542139826 GB/s) +Achieved 45.40556838992032 GFLOPs +Completed in 0.002256304 s (achieved 17.196266105985718 GB/s) +Achieved 45.38395535353392 GFLOPs +Completed in 0.0022548010000000003 s (achieved 17.207728753003035 GB/s) +Achieved 45.41420728481138 GFLOPs +Durations: [0.0022535470000000003, 0.002253763, 0.002255295, 0.0022536360000000003, 0.002254882, 0.0022547360000000002, 0.002254074, 0.00225523, 0.002256304, 0.0022548010000000003] +Median duration 0.0022548010000000003 (17.207728753003035 GB/s) 3.773624726535753% of peak. +Median achieved 45.41420728481138 GFLOPs +./sycl_spmm 100000 100000 64 16 k 32 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 64 columns. +Using method k with WG size 32 +OK! +Completed in 0.004898638 s (achieved 13.146512152969867 GB/s) +Achieved 41.807539156802356 GFLOPs +Completed in 0.004898286 s (achieved 13.147456885939288 GB/s) +Achieved 41.810543524816644 GFLOPs +Completed in 0.004896131000000001 s (achieved 13.153243652998665 GB/s) +Achieved 41.82894616177549 GFLOPs +Completed in 0.004894321 s (achieved 13.158107937750714 GB/s) +Achieved 41.844415190585174 GFLOPs +Completed in 0.004898393 s (achieved 13.14716969422421 GB/s) +Achieved 41.80963021954343 GFLOPs +Completed in 0.004893329 s (achieved 13.160775414855614 GB/s) +Achieved 41.85289809861548 GFLOPs +Completed in 0.004896331 s (achieved 13.152706383616632 GB/s) +Achieved 41.82723757850521 GFLOPs +Completed in 0.004896506 s (achieved 13.152236308910886 GB/s) +Achieved 41.825742682639415 GFLOPs +Completed in 0.00489687 s (achieved 13.151258661144773 GB/s) +Achieved 41.82263364148936 GFLOPs +Completed in 0.0048929640000000005 s (achieved 13.161757168047835 GB/s) +Achieved 41.856020195529744 GFLOPs +Durations: [0.004898638, 0.004898286, 0.004896131000000001, 0.004894321, 0.004898393, 0.004893329, 0.004896331, 0.004896506, 0.00489687, 0.0048929640000000005] +Median duration 0.004896506 (13.152236308910886 GB/s) 2.8842623484453696% of peak. +Median achieved 41.825742682639415 GFLOPs +./sycl_spmm 100000 100000 128 16 k 32 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 128 columns. +Using method k with WG size 32 +OK! +Completed in 0.009267685000000001 s (achieved 12.473449842112673 GB/s) +Achieved 44.19658199431681 GFLOPs +Completed in 0.009264870000000001 s (achieved 12.477239723816954 GB/s) +Achieved 44.210010502036184 GFLOPs +Completed in 0.009264847000000001 s (achieved 12.477270698587898 GB/s) +Achieved 44.21012025346991 GFLOPs +Completed in 0.009265918 s (achieved 12.47582851477857 GB/s) +Achieved 44.20501023212164 GFLOPs +Completed in 0.009266421 s (achieved 12.475151301672998 GB/s) +Achieved 44.20261069511088 GFLOPs +Completed in 0.009263713 s (achieved 12.478798080208229 GB/s) +Achieved 44.21553215217268 GFLOPs +Completed in 0.009265405 s (achieved 12.476519267101654 GB/s) +Achieved 44.207457741998326 GFLOPs +Completed in 0.009274216 s (achieved 12.464665908147923 GB/s) +Achieved 44.16545829857748 GFLOPs +Completed in 0.009263751 s (achieved 12.47874689205269 GB/s) +Achieved 44.215350779614 GFLOPs +Completed in 0.00926307 s (achieved 12.479664301360133 GB/s) +Achieved 44.21860139241094 GFLOPs +Durations: [0.009267685000000001, 0.009264870000000001, 0.009264847000000001, 0.009265918, 0.009266421, 0.009263713, 0.009265405, 0.009274216, 0.009263751, 0.00926307] +Median duration 0.009265405 (12.476519267101654 GB/s) 2.7360787866450993% of peak. +Median achieved 44.207457741998326 GFLOPs +./sycl_spmm 100000 100000 256 16 k 32 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 256 columns. +Using method k with WG size 32 +OK! +Completed in 0.018677733000000002 s (achieved 11.671652228886664 GB/s) +Achieved 43.859712525069284 GFLOPs +Completed in 0.018671217 s (achieved 11.675725476277204 GB/s) +Achieved 43.87501896635875 GFLOPs +Completed in 0.018677834 s (achieved 11.671589114669294 GB/s) +Achieved 43.85947535458341 GFLOPs +Completed in 0.018681726000000003 s (achieved 11.669157550003677 GB/s) +Achieved 43.8503380255122 GFLOPs +Completed in 0.018682697 s (achieved 11.668551066262008 GB/s) +Achieved 43.84805898206238 GFLOPs +Completed in 0.018675171 s (achieved 11.673253433663339 GB/s) +Achieved 43.86572952933068 GFLOPs +Completed in 0.018669282000000002 s (achieved 11.67693562076999 GB/s) +Achieved 43.87956644502986 GFLOPs +Completed in 0.018681228 s (achieved 11.669468623797108 GB/s) +Achieved 43.85150697802093 GFLOPs +Completed in 0.018675357 s (achieved 11.67313717215687 GB/s) +Achieved 43.86529264206302 GFLOPs +Completed in 0.018666463 s (achieved 11.67869906580588 GB/s) +Achieved 43.886193115428455 GFLOPs +Durations: [0.018677733000000002, 0.018671217, 0.018677834, 0.018681726000000003, 0.018682697, 0.018675171, 0.018669282000000002, 0.018681228, 0.018675357, 0.018666463] +Median duration 0.018677733000000002 (11.671652228886664 GB/s) 2.559572857211988% of peak. +Median achieved 43.859712525069284 GFLOPs +./sycl_spmm 100000 100000 512 16 k 32 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 512 columns. +Using method k with WG size 32 +OK! +Completed in 0.036930637 s (achieved 11.44848933962336 GB/s) +Achieved 44.36424966078977 GFLOPs +Completed in 0.036933111000000005 s (achieved 11.447722451542193 GB/s) +Achieved 44.36127787881178 GFLOPs +Completed in 0.036892434 s (achieved 11.460344524842139 GB/s) +Achieved 44.41018990506292 GFLOPs +Completed in 0.036912575 s (achieved 11.454091295446064 GB/s) +Achieved 44.38595790188032 GFLOPs +Completed in 0.036919689000000006 s (achieved 11.451884223618459 GB/s) +Achieved 44.37740523762266 GFLOPs +Completed in 0.036919347000000005 s (achieved 11.451990307412531 GB/s) +Achieved 44.377816324866195 GFLOPs +Completed in 0.036904532000000004 s (achieved 11.45658760826448 GB/s) +Achieved 44.39563140917218 GFLOPs +Completed in 0.036937884000000004 s (achieved 11.44624321198258 GB/s) +Achieved 44.35554565063878 GFLOPs +Completed in 0.036918304 s (achieved 11.452313844102916 GB/s) +Achieved 44.379070067790764 GFLOPs +Completed in 0.036912963 s (achieved 11.453970899057873 GB/s) +Achieved 44.385491351642514 GFLOPs +Durations: [0.036930637, 0.036933111000000005, 0.036892434, 0.036912575, 0.036919689000000006, 0.036919347000000005, 0.036904532000000004, 0.036937884000000004, 0.036918304, 0.036912963] +Median duration 0.036919347000000005 (11.451990307412531 GB/s) 2.511401383204502% of peak. +Median achieved 44.377816324866195 GFLOPs +./sycl_spmm 100000 100000 1 32 k 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 1 columns. +Using method k with WG size 32 +OK! +Completed in 0.000194178 s (achieved 138.01771570414775 GB/s) +Achieved 32.95944957719206 GFLOPs +Completed in 0.00019492900000000002 s (achieved 137.48597694545194 GB/s) +Achieved 32.83246720600834 GFLOPs +Completed in 0.00019647500000000002 s (achieved 136.40414302074055 GB/s) +Achieved 32.57411884463672 GFLOPs +Completed in 0.000196436 s (achieved 136.43122441914926 GB/s) +Achieved 32.580586043291454 GFLOPs +Completed in 0.00019393900000000001 s (achieved 138.18780131897142 GB/s) +Achieved 33.00006703138616 GFLOPs +Completed in 0.00019651100000000002 s (achieved 136.37915434759378 GB/s) +Achieved 32.56815140119382 GFLOPs +Completed in 0.000194849 s (achieved 137.5424251599957 GB/s) +Achieved 32.84594737463369 GFLOPs +Completed in 0.000194305 s (achieved 137.92750572553462 GB/s) +Achieved 32.93790689894753 GFLOPs +Completed in 0.000194123 s (achieved 138.05681964527645 GB/s) +Achieved 32.968787830396195 GFLOPs +Completed in 0.00019735800000000002 s (achieved 135.79385684897497 GB/s) +Achieved 32.428378885071794 GFLOPs +Durations: [0.000194178, 0.00019492900000000002, 0.00019647500000000002, 0.000196436, 0.00019393900000000001, 0.00019651100000000002, 0.000194849, 0.000194305, 0.000194123, 0.00019735800000000002] +Median duration 0.00019492900000000002 (137.48597694545194 GB/s) 30.150433540669283% of peak. +Median achieved 32.83246720600834 GFLOPs +./sycl_spmm 100000 100000 8 32 k 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 8 columns. +Using method k with WG size 32 +OK! +Completed in 0.000905525 s (achieved 35.78035283399134 GB/s) +Achieved 56.54178515225974 GFLOPs +Completed in 0.000904879 s (achieved 35.80589670000077 GB/s) +Achieved 56.58215076269866 GFLOPs +Completed in 0.0009050940000000001 s (achieved 35.797391210194746 GB/s) +Achieved 56.56870999034354 GFLOPs +Completed in 0.000905726 s (achieved 35.77241240728432 GB/s) +Achieved 56.529237319012594 GFLOPs +Completed in 0.0009044910000000001 s (achieved 35.821256375132535 GB/s) +Achieved 56.60642283892266 GFLOPs +Completed in 0.0009051020000000001 s (achieved 35.797074804828625 GB/s) +Achieved 56.56820999180203 GFLOPs +Completed in 0.000906282 s (achieved 35.75046619043521 GB/s) +Achieved 56.494556881853555 GFLOPs +Completed in 0.0009058100000000001 s (achieved 35.76909506408629 GB/s) +Achieved 56.5239950983098 GFLOPs +Completed in 0.000904343 s (achieved 35.82711869279687 GB/s) +Achieved 56.615686747174465 GFLOPs +Completed in 0.0009041510000000001 s (achieved 35.834726721532135 GB/s) +Achieved 56.62770930961753 GFLOPs +Durations: [0.000905525, 0.000904879, 0.0009050940000000001, 0.000905726, 0.0009044910000000001, 0.0009051020000000001, 0.000906282, 0.0009058100000000001, 0.000904343, 0.0009041510000000001] +Median duration 0.0009051020000000001 (35.797074804828625 GB/s) 7.850235702813294% of peak. +Median achieved 56.56820999180203 GFLOPs +./sycl_spmm 100000 100000 32 32 k 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 32 columns. +Using method k with WG size 32 +OK! +Completed in 0.0038782380000000004 s (achieved 13.305012224623656 GB/s) +Achieved 52.80748628629805 GFLOPs +Completed in 0.0038790670000000004 s (achieved 13.302168794712749 GB/s) +Achieved 52.7962007358986 GFLOPs +Completed in 0.00388046 s (achieved 13.297393607974314 GB/s) +Achieved 52.777248058219904 GFLOPs +Completed in 0.0038776880000000002 s (achieved 13.306899368902295 GB/s) +Achieved 52.81497634673032 GFLOPs +Completed in 0.00387929 s (achieved 13.3014041229194 GB/s) +Achieved 52.793165759713766 GFLOPs +Completed in 0.003881208 s (achieved 13.294830887703004 GB/s) +Achieved 52.76707664211761 GFLOPs +Completed in 0.0038812280000000005 s (achieved 13.294762379329429 GB/s) +Achieved 52.766804732935036 GFLOPs +Completed in 0.0038826370000000004 s (achieved 13.28993774076742 GB/s) +Achieved 52.74765578136714 GFLOPs +Completed in 0.0038813320000000004 s (achieved 13.294406147168035 GB/s) +Achieved 52.765390850357555 GFLOPs +Completed in 0.0038807460000000005 s (achieved 13.296413627689109 GB/s) +Achieved 52.77335852436619 GFLOPs +Durations: [0.0038782380000000004, 0.0038790670000000004, 0.00388046, 0.0038776880000000002, 0.00387929, 0.003881208, 0.0038812280000000005, 0.0038826370000000004, 0.0038813320000000004, 0.0038807460000000005] +Median duration 0.0038807460000000005 (13.296413627689109 GB/s) 2.9158801815107696% of peak. +Median achieved 52.77335852436619 GFLOPs +./sycl_spmm 100000 100000 64 32 k 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 64 columns. +Using method k with WG size 32 +OK! +Completed in 0.008051586000000001 s (achieved 9.588173559842742 GB/s) +Achieved 50.87196485263897 GFLOPs +Completed in 0.008060126 s (achieved 9.578014537241724 GB/s) +Achieved 50.81806413448127 GFLOPs +Completed in 0.008063028 s (achieved 9.574567271749522 GB/s) +Achieved 50.79977398069311 GFLOPs +Completed in 0.008057473 s (achieved 9.581168190076466 GB/s) +Achieved 50.83479646782558 GFLOPs +Completed in 0.008055994 s (achieved 9.582927196817673 GB/s) +Achieved 50.84412922849744 GFLOPs +Completed in 0.008057473 s (achieved 9.581168190076466 GB/s) +Achieved 50.83479646782558 GFLOPs +Completed in 0.008063203 s (achieved 9.574359469803749 GB/s) +Achieved 50.79867144607422 GFLOPs +Completed in 0.008080752 s (achieved 9.55356679675357 GB/s) +Achieved 50.68835177716134 GFLOPs +Completed in 0.00806135 s (achieved 9.576560253555547 GB/s) +Achieved 50.810348142680816 GFLOPs +Completed in 0.008061965 s (achieved 9.575829713971718 GB/s) +Achieved 50.80647211939024 GFLOPs +Durations: [0.008051586000000001, 0.008060126, 0.008063028, 0.008057473, 0.008055994, 0.008057473, 0.008063203, 0.008080752, 0.00806135, 0.008061965] +Median duration 0.00806135 (9.576560253555547 GB/s) 2.1001228626218302% of peak. +Median achieved 50.810348142680816 GFLOPs +./sycl_spmm 100000 100000 128 32 k 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 128 columns. +Using method k with WG size 32 +OK! +Completed in 0.014185974 s (achieved 9.05119408790683 GB/s) +Achieved 57.74718041919434 GFLOPs +Completed in 0.014182611000000001 s (achieved 9.053340319353044 GB/s) +Achieved 57.760873509116195 GFLOPs +Completed in 0.014197241000000001 s (achieved 9.044011015943168 GB/s) +Achieved 57.70135197395043 GFLOPs +Completed in 0.014197442000000001 s (achieved 9.043882975538834 GB/s) +Achieved 57.70053506821863 GFLOPs +Completed in 0.014186837 s (achieved 9.050643494388495 GB/s) +Achieved 57.743667598351905 GFLOPs +Completed in 0.014193418000000001 s (achieved 9.046447022133782 GB/s) +Achieved 57.71689384473845 GFLOPs +Completed in 0.014196866 s (achieved 9.044249906986515 GB/s) +Achieved 57.702876113643676 GFLOPs +Completed in 0.014194343000000002 s (achieved 9.04585749407352 GB/s) +Achieved 57.713132619100435 GFLOPs +Completed in 0.014194041000000001 s (achieved 9.046049958570643 GB/s) +Achieved 57.71436055454539 GFLOPs +Completed in 0.014195480000000002 s (achieved 9.04513295781474 GB/s) +Achieved 57.70851003277099 GFLOPs +Durations: [0.014185974, 0.014182611000000001, 0.014197241000000001, 0.014197442000000001, 0.014186837, 0.014193418000000001, 0.014196866, 0.014194343000000002, 0.014194041000000001, 0.014195480000000002] +Median duration 0.014194343000000002 (9.04585749407352 GB/s) 1.9837406785248946% of peak. +Median achieved 57.713132619100435 GFLOPs +./sycl_spmm 100000 100000 256 32 k 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 256 columns. +Using method k with WG size 32 +OK! +Completed in 0.028724665000000003 s (achieved 8.03490672563109 GB/s) +Achieved 57.03808904298796 GFLOPs +Completed in 0.028716158000000002 s (achieved 8.037287021474112 GB/s) +Achieved 57.05498625547331 GFLOPs +Completed in 0.028707877000000003 s (achieved 8.039605436514863 GB/s) +Achieved 57.071444189342174 GFLOPs +Completed in 0.028712661 s (achieved 8.038265906458479 GB/s) +Achieved 57.06193515118644 GFLOPs +Completed in 0.028724350000000003 s (achieved 8.034994838873638 GB/s) +Achieved 57.03871454010273 GFLOPs +Completed in 0.028716879 s (achieved 8.037085227820196 GB/s) +Achieved 57.053553765365656 GFLOPs +Completed in 0.028731876000000003 s (achieved 8.032890160043847 GB/s) +Achieved 57.02377387400669 GFLOPs +Completed in 0.028713142 s (achieved 8.038131250143227 GB/s) +Achieved 57.060979254725936 GFLOPs +Completed in 0.028716495 s (achieved 8.03719270057157 GB/s) +Achieved 57.054316691504305 GFLOPs +Completed in 0.028733311 s (achieved 8.03248898116893 GB/s) +Achieved 57.02092599074294 GFLOPs +Durations: [0.028724665000000003, 0.028716158000000002, 0.028707877000000003, 0.028712661, 0.028724350000000003, 0.028716879, 0.028731876000000003, 0.028713142, 0.028716495, 0.028733311] +Median duration 0.028716879 (8.037085227820196 GB/s) 1.7625186903114463% of peak. +Median achieved 57.053553765365656 GFLOPs +./sycl_spmm 100000 100000 512 32 k 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 512 columns. +Using method k with WG size 32 +OK! +Completed in 0.058730973000000006 s (achieved 7.41687020918247 GB/s) +Achieved 55.79338860944803 GFLOPs +Completed in 0.058732428 s (achieved 7.416686468333984 GB/s) +Achieved 55.7920064193498 GFLOPs +Completed in 0.058737613 s (achieved 7.41603176826406 GB/s) +Achieved 55.787081439622 GFLOPs +Completed in 0.058723705 s (achieved 7.417788165784159 GB/s) +Achieved 55.800293935813485 GFLOPs +Completed in 0.058753239000000006 s (achieved 7.414059401899528 GB/s) +Achieved 55.772244318308985 GFLOPs +Completed in 0.058733727000000006 s (achieved 7.416522435227037 GB/s) +Achieved 55.79077248069069 GFLOPs +Completed in 0.058727671 s (achieved 7.417287227344671 GB/s) +Achieved 55.79652562758704 GFLOPs +Completed in 0.058729611 s (achieved 7.417042214020454 GB/s) +Achieved 55.79468251543502 GFLOPs +Completed in 0.058738542000000005 s (achieved 7.415914477414165 GB/s) +Achieved 55.78619911948104 GFLOPs +Completed in 0.058716159000000004 s (achieved 7.418741474557285 GB/s) +Achieved 55.807465198804984 GFLOPs +Durations: [0.058730973000000006, 0.058732428, 0.058737613, 0.058723705, 0.058753239000000006, 0.058733727000000006, 0.058727671, 0.058729611, 0.058738542000000005, 0.058716159000000004] +Median duration 0.058732428 (7.416686468333984 GB/s) 1.6264663307749965% of peak. +Median achieved 55.7920064193498 GFLOPs +./sycl_spmm 100000 100000 1 64 k 32 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 1 columns. +Using method k with WG size 32 +OK! +Completed in 0.00031329000000000003 s (achieved 167.2571866321938 GB/s) +Achieved 40.85671422643557 GFLOPs +Completed in 0.000312484 s (achieved 167.68859845624095 GB/s) +Achieved 40.96209725937968 GFLOPs +Completed in 0.00031217800000000003 s (achieved 167.8529684987411 GB/s) +Achieved 41.002248717078075 GFLOPs +Completed in 0.000313402 s (achieved 167.19741418370018 GB/s) +Achieved 40.84211332410131 GFLOPs +Completed in 0.000312349 s (achieved 167.7610749514165 GB/s) +Achieved 40.97980144005584 GFLOPs +Completed in 0.00031270500000000004 s (achieved 167.57008682304405 GB/s) +Achieved 40.933147855007114 GFLOPs +Completed in 0.00031302200000000004 s (achieved 167.4003871932324 GB/s) +Achieved 40.89169451348467 GFLOPs +Completed in 0.000312589 s (achieved 167.6322711291824 GB/s) +Achieved 40.94833791336227 GFLOPs +Completed in 0.000312713 s (achieved 167.56579995075356 GB/s) +Achieved 40.9321006801764 GFLOPs +Completed in 0.000312971 s (achieved 167.42766582207298 GB/s) +Achieved 40.89835799483019 GFLOPs +Durations: [0.00031329000000000003, 0.000312484, 0.00031217800000000003, 0.000313402, 0.000312349, 0.00031270500000000004, 0.00031302200000000004, 0.000312589, 0.000312713, 0.000312971] +Median duration 0.000312713 (167.56579995075356 GB/s) 36.74688595411262% of peak. +Median achieved 40.9321006801764 GFLOPs +./sycl_spmm 100000 100000 8 64 k 32 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 8 columns. +Using method k with WG size 32 +OK! +Completed in 0.0015792850000000002 s (achieved 36.72548273427532 GB/s) +Achieved 64.83946849365377 GFLOPs +Completed in 0.001580145 s (achieved 36.705494748899625 GB/s) +Achieved 64.80417936328628 GFLOPs +Completed in 0.001578589 s (achieved 36.74167500216966 GB/s) +Achieved 64.8680562198267 GFLOPs +Completed in 0.001577913 s (achieved 36.75741564965876 GB/s) +Achieved 64.8958466024426 GFLOPs +Completed in 0.0015786020000000001 s (achieved 36.74137242952941 GB/s) +Achieved 64.86752202265042 GFLOPs +Completed in 0.0015790460000000002 s (achieved 36.73104140094715 GB/s) +Achieved 64.84928241482515 GFLOPs +Completed in 0.0015785950000000001 s (achieved 36.74153535263953 GB/s) +Achieved 64.8678096661905 GFLOPs +Completed in 0.001580198 s (achieved 36.70426364291057 GB/s) +Achieved 64.80200582458654 GFLOPs +Completed in 0.0015792 s (achieved 36.727459473150965 GB/s) +Achieved 64.84295845997974 GFLOPs +Completed in 0.0015796760000000001 s (achieved 36.716392475419006 GB/s) +Achieved 64.82341948602118 GFLOPs +Durations: [0.0015792850000000002, 0.001580145, 0.001578589, 0.001577913, 0.0015786020000000001, 0.0015790460000000002, 0.0015785950000000001, 0.001580198, 0.0015792, 0.0015796760000000001] +Median duration 0.0015792 (36.727459473150965 GB/s) 8.05426742832258% of peak. +Median achieved 64.84295845997974 GFLOPs +./sycl_spmm 100000 100000 32 64 k 32 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 32 columns. +Using method k with WG size 32 +OK! +Completed in 0.006415474 s (achieved 12.033406105301028 GB/s) +Achieved 63.84563322990632 GFLOPs +Completed in 0.006418395 s (achieved 12.027929723863988 GB/s) +Achieved 63.816577197258816 GFLOPs +Completed in 0.006418651 s (achieved 12.02745000468167 GB/s) +Achieved 63.814031951573625 GFLOPs +Completed in 0.006417209 s (achieved 12.030152672291022 GB/s) +Achieved 63.828371492965246 GFLOPs +Completed in 0.006417209 s (achieved 12.030152672291022 GB/s) +Achieved 63.828371492965246 GFLOPs +Completed in 0.006415888000000001 s (achieved 12.032629621963475 GB/s) +Achieved 63.84151344287805 GFLOPs +Completed in 0.006425594000000001 s (achieved 12.014454072261646 GB/s) +Achieved 63.74507944323902 GFLOPs +Completed in 0.006413455 s (achieved 12.037194304785798 GB/s) +Achieved 63.86573227690847 GFLOPs +Completed in 0.00641707 s (achieved 12.030413257140719 GB/s) +Achieved 63.82975407779563 GFLOPs +Completed in 0.0064165820000000005 s (achieved 12.031328205577362 GB/s) +Achieved 63.83460851899033 GFLOPs +Durations: [0.006415474, 0.006418395, 0.006418651, 0.006417209, 0.006417209, 0.006415888000000001, 0.006425594000000001, 0.006413455, 0.00641707, 0.0064165820000000005] +Median duration 0.006417209 (12.030152672291022 GB/s) 2.638191375502417% of peak. +Median achieved 63.828371492965246 GFLOPs +./sycl_spmm 100000 100000 64 64 k 32 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 64 columns. +Using method k with WG size 32 +OK! +Completed in 0.012651126 s (achieved 8.125759240718969 GB/s) +Achieved 64.75312948428464 GFLOPs +Completed in 0.012649256000000001 s (achieved 8.126960510562833 GB/s) +Achieved 64.76270224904927 GFLOPs +Completed in 0.012652431 s (achieved 8.124921131757208 GB/s) +Achieved 64.74645070184536 GFLOPs +Completed in 0.012654587 s (achieved 8.123536864537737 GB/s) +Achieved 64.73541965454898 GFLOPs +Completed in 0.012665172 s (achieved 8.116747565686435 GB/s) +Achieved 64.68131660588581 GFLOPs +Completed in 0.012658663 s (achieved 8.120921143093863 GB/s) +Achieved 64.71457530704467 GFLOPs +Completed in 0.012648995000000001 s (achieved 8.12712820267539 GB/s) +Achieved 64.76403856590977 GFLOPs +Completed in 0.012654169000000002 s (achieved 8.123805206015502 GB/s) +Achieved 64.7375580332458 GFLOPs +Completed in 0.012653659000000001 s (achieved 8.124132632308172 GB/s) +Achieved 64.74016725122749 GFLOPs +Completed in 0.012656876000000001 s (achieved 8.122067720344262 GB/s) +Achieved 64.72371223357169 GFLOPs +Durations: [0.012651126, 0.012649256000000001, 0.012652431, 0.012654587, 0.012665172, 0.012658663, 0.012648995000000001, 0.012654169000000002, 0.012653659000000001, 0.012656876000000001] +Median duration 0.012654169000000002 (8.123805206015502 GB/s) 1.7815362293893644% of peak. +Median achieved 64.7375580332458 GFLOPs +./sycl_spmm 100000 100000 128 64 k 32 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 128 columns. +Using method k with WG size 32 +OK! +Completed in 0.031660592 s (achieved 4.8640911073298945 GB/s) +Achieved 51.748874436712995 GFLOPs +Completed in 0.03163992 s (achieved 4.867269070212567 GB/s) +Achieved 51.78268465912682 GFLOPs +Completed in 0.031641430000000005 s (achieved 4.867036793216993 GB/s) +Achieved 51.78021347328486 GFLOPs +Completed in 0.031666446 s (achieved 4.863191909821519 GB/s) +Achieved 51.73930790970354 GFLOPs +Completed in 0.031658196 s (achieved 4.864459238296459 GB/s) +Achieved 51.752790967621785 GFLOPs +Completed in 0.031644952000000004 s (achieved 4.866495104811661 GB/s) +Achieved 51.77445047159496 GFLOPs +Completed in 0.031645839 s (achieved 4.866358702008185 GB/s) +Achieved 51.77299928752086 GFLOPs +Completed in 0.031663905 s (achieved 4.863582176614035 GB/s) +Achieved 51.743459942796065 GFLOPs +Completed in 0.031645649000000005 s (achieved 4.866387919552542 GB/s) +Achieved 51.77331013182886 GFLOPs +Completed in 0.031669829000000004 s (achieved 4.862672419229039 GB/s) +Achieved 51.7337810696736 GFLOPs +Durations: [0.031660592, 0.03163992, 0.031641430000000005, 0.031666446, 0.031658196, 0.031644952000000004, 0.031645839, 0.031663905, 0.031645649000000005, 0.031669829000000004] +Median duration 0.031658196 (4.864459238296459 GB/s) 1.0667673768193988% of peak. +Median achieved 51.752790967621785 GFLOPs +./sycl_spmm 100000 100000 256 64 k 32 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 256 columns. +Using method k with WG size 32 +OK! +Completed in 0.061131580000000005 s (achieved 4.194231590284432 GB/s) +Achieved 53.60240975286423 GFLOPs +Completed in 0.061114427000000006 s (achieved 4.195408786210169 GB/s) +Achieved 53.61745435329043 GFLOPs +Completed in 0.061092626000000004 s (achieved 4.196905924456415 GB/s) +Achieved 53.63658782649153 GFLOPs +Completed in 0.061129471000000005 s (achieved 4.194376293555689 GB/s) +Achieved 53.60425906515697 GFLOPs +Completed in 0.061096687000000004 s (achieved 4.196626962768047 GB/s) +Achieved 53.63302268746585 GFLOPs +Completed in 0.061091089 s (achieved 4.19701151505091 GB/s) +Achieved 53.63793727756269 GFLOPs +Completed in 0.061128711 s (achieved 4.194428441326041 GB/s) +Achieved 53.60492551527874 GFLOPs +Completed in 0.061120903000000004 s (achieved 4.194964266152939 GB/s) +Achieved 53.611773373178075 GFLOPs +Completed in 0.061094904000000005 s (achieved 4.1967494375635646 GB/s) +Achieved 53.634587919149524 GFLOPs +Completed in 0.06107387400000001 s (achieved 4.198194534049043 GB/s) +Achieved 53.65305629703463 GFLOPs +Durations: [0.061131580000000005, 0.061114427000000006, 0.061092626000000004, 0.061129471000000005, 0.061096687000000004, 0.061091089, 0.061128711, 0.061120903000000004, 0.061094904000000005, 0.06107387400000001] +Median duration 0.061114427000000006 (4.195408786210169 GB/s) 0.9200457864495986% of peak. +Median achieved 53.61745435329043 GFLOPs +./sycl_spmm 100000 100000 512 64 k 32 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 512 columns. +Using method k with WG size 32 +OK! +Completed in 0.110643872 s (achieved 4.168328490890124 GB/s) +Achieved 59.231477365506514 GFLOPs +Completed in 0.11067484300000001 s (achieved 4.167162035188069 GB/s) +Achieved 59.21490216164119 GFLOPs +Completed in 0.110624312 s (achieved 4.169065512470713 GB/s) +Achieved 59.24195035897715 GFLOPs +Completed in 0.11063402400000001 s (achieved 4.168699531348512 GB/s) +Achieved 59.23674980853991 GFLOPs +Completed in 0.11068918600000001 s (achieved 4.166622058274058 GB/s) +Achieved 59.20722915064169 GFLOPs +Completed in 0.11061155 s (achieved 4.1695465256566795 GB/s) +Achieved 59.24878550205652 GFLOPs +Completed in 0.11073577300000001 s (achieved 4.164869143054612 GB/s) +Achieved 59.18232042322944 GFLOPs +Completed in 0.11063239100000001 s (achieved 4.168761063837082 GB/s) +Achieved 59.23762417825716 GFLOPs +Completed in 0.11059854300000001 s (achieved 4.170036887375632 GB/s) +Achieved 59.25575348673445 GFLOPs +Completed in 0.110632593 s (achieved 4.168753452248923 GB/s) +Achieved 59.23751601844856 GFLOPs +Durations: [0.110643872, 0.11067484300000001, 0.110624312, 0.11063402400000001, 0.11068918600000001, 0.11061155, 0.11073577300000001, 0.11063239100000001, 0.11059854300000001, 0.110632593] +Median duration 0.11063402400000001 (4.168699531348512 GB/s) 0.9141884937167789% of peak. +Median achieved 59.23674980853991 GFLOPs +./sycl_spmm 100000 100000 1 128 k 32 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 1 columns. +Using method k with WG size 32 +OK! +Completed in 0.000539393 s (achieved 192.0677576460948 GB/s) +Achieved 47.4607568136776 GFLOPs +Completed in 0.000540835 s (achieved 191.55565745560108 GB/s) +Achieved 47.3342146865495 GFLOPs +Completed in 0.0005413320000000001 s (achieved 191.37978911278105 GB/s) +Achieved 47.2907568737854 GFLOPs +Completed in 0.000541015 s (achieved 191.49192536251306 GB/s) +Achieved 47.31846621627866 GFLOPs +Completed in 0.000541135 s (achieved 191.44946085542426 GB/s) +Achieved 47.30797305663097 GFLOPs +Completed in 0.0005432960000000001 s (achieved 190.6879564730828 GB/s) +Achieved 47.11980209683119 GFLOPs +Completed in 0.00054031 s (achieved 191.74178527141828 GB/s) +Achieved 47.38020765856638 GFLOPs +Completed in 0.0005408 s (achieved 191.56805473372782 GB/s) +Achieved 47.337278106508876 GFLOPs +Completed in 0.000542791 s (achieved 190.8653680698464 GB/s) +Achieved 47.16364125418439 GFLOPs +Completed in 0.0005411600000000001 s (achieved 191.44061645354424 GB/s) +Achieved 47.3057875674477 GFLOPs +Durations: [0.000539393, 0.000540835, 0.0005413320000000001, 0.000541015, 0.000541135, 0.0005432960000000001, 0.00054031, 0.0005408, 0.000542791, 0.0005411600000000001] +Median duration 0.000541135 (191.44946085542426 GB/s) 41.984530889347425% of peak. +Median achieved 47.30797305663097 GFLOPs +./sycl_spmm 100000 100000 8 128 k 32 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 8 columns. +Using method k with WG size 32 +OK! +Completed in 0.0029256050000000004 s (achieved 37.32561436010671 GB/s) +Achieved 70.00261484376735 GFLOPs +Completed in 0.002924367 s (achieved 37.34141576621539 GB/s) +Achieved 70.03224971421166 GFLOPs +Completed in 0.0029257750000000003 s (achieved 37.3234455827943 GB/s) +Achieved 69.99854739342567 GFLOPs +Completed in 0.002924743 s (achieved 37.3366152171319 GB/s) +Achieved 70.0232464869563 GFLOPs +Completed in 0.002927841 s (achieved 37.29710868862073 GB/s) +Achieved 69.94915365964204 GFLOPs +Completed in 0.0029240670000000002 s (achieved 37.34524687703804 GB/s) +Achieved 70.03943480091256 GFLOPs +Completed in 0.002926679 s (achieved 37.311917022673136 GB/s) +Achieved 69.97692606534574 GFLOPs +Completed in 0.002927589 s (achieved 37.300319136326856 GB/s) +Achieved 69.95517471885569 GFLOPs +Completed in 0.0029260370000000003 s (achieved 37.32010360771241 GB/s) +Achieved 69.99227966016834 GFLOPs +Completed in 0.0029275240000000004 s (achieved 37.30114731766503 GB/s) +Achieved 69.95672793801178 GFLOPs +Durations: [0.0029256050000000004, 0.002924367, 0.0029257750000000003, 0.002924743, 0.002927841, 0.0029240670000000002, 0.002926679, 0.002927589, 0.0029260370000000003, 0.0029275240000000004] +Median duration 0.0029260370000000003 (37.32010360771241 GB/s) 8.184233247305352% of peak. +Median achieved 69.99227966016834 GFLOPs +./sycl_spmm 100000 100000 32 128 k 32 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 32 columns. +Using method k with WG size 32 +OK! +Completed in 0.012207264 s (achieved 10.518327775986496 GB/s) +Achieved 67.10758446773987 GFLOPs +Completed in 0.012212779 s (achieved 10.513577949785223 GB/s) +Achieved 67.07728028158047 GFLOPs +Completed in 0.012210318000000001 s (achieved 10.515696970381935 GB/s) +Achieved 67.09079976459253 GFLOPs +Completed in 0.012205564 s (achieved 10.51979277647473 GB/s) +Achieved 67.11693126184092 GFLOPs +Completed in 0.012210416 s (achieved 10.515612572085997 GB/s) +Achieved 67.09026129822277 GFLOPs +Completed in 0.012211557000000001 s (achieved 10.514630034482908 GB/s) +Achieved 67.08399264729304 GFLOPs +Completed in 0.012209756 s (achieved 10.516180994935526 GB/s) +Achieved 67.09388787130554 GFLOPs +Completed in 0.012202702000000001 s (achieved 10.522260069941886 GB/s) +Achieved 67.13267274739644 GFLOPs +Completed in 0.012202377 s (achieved 10.522540321447208 GB/s) +Achieved 67.13446076940583 GFLOPs +Completed in 0.01220887 s (achieved 10.51694415617498 GB/s) +Achieved 67.09875688741054 GFLOPs +Durations: [0.012207264, 0.012212779, 0.012210318000000001, 0.012205564, 0.012210416, 0.012211557000000001, 0.012209756, 0.012202702000000001, 0.012202377, 0.01220887] +Median duration 0.012209756 (10.516180994935526 GB/s) 2.306180042749019% of peak. +Median achieved 67.09388787130554 GFLOPs +./sycl_spmm 100000 100000 64 128 k 32 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 64 columns. +Using method k with WG size 32 +OK! +Completed in 0.023838138000000002 s (achieved 6.460236281877385 GB/s) +Achieved 68.73020032017601 GFLOPs +Completed in 0.023828775 s (achieved 6.462774691523169 GB/s) +Achieved 68.75720636079699 GFLOPs +Completed in 0.023839540000000003 s (achieved 6.4598563562887525 GB/s) +Achieved 68.72615830674584 GFLOPs +Completed in 0.023828478 s (achieved 6.462855244048739 GB/s) +Achieved 68.75806335595584 GFLOPs +Completed in 0.023835579000000003 s (achieved 6.460929856161664 GB/s) +Achieved 68.73757922977244 GFLOPs +Completed in 0.023848347000000002 s (achieved 6.457470784033794 GB/s) +Achieved 68.70077829712893 GFLOPs +Completed in 0.023830731 s (achieved 6.462244234136166 GB/s) +Achieved 68.75156284547042 GFLOPs +Completed in 0.023834414 s (achieved 6.4612456593226915 GB/s) +Achieved 68.74093904721131 GFLOPs +Completed in 0.023841965000000003 s (achieved 6.459199315157118 GB/s) +Achieved 68.71916807192696 GFLOPs +Completed in 0.023822711 s (achieved 6.464419771536497 GB/s) +Achieved 68.77470830251016 GFLOPs +Durations: [0.023838138000000002, 0.023828775, 0.023839540000000003, 0.023828478, 0.023835579000000003, 0.023848347000000002, 0.023830731, 0.023834414, 0.023841965000000003, 0.023822711] +Median duration 0.023835579000000003 (6.460929856161664 GB/s) 1.416870582491593% of peak. +Median achieved 68.73757922977244 GFLOPs +./sycl_spmm 100000 100000 128 128 k 32 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 128 columns. +Using method k with WG size 32 +OK! +Completed in 0.062985505 s (achieved 3.257892494471546 GB/s) +Achieved 52.02466821532987 GFLOPs +Completed in 0.06300256900000001 s (achieved 3.2570101070005575 GB/s) +Achieved 52.01057753692552 GFLOPs +Completed in 0.063050902 s (achieved 3.254513377144073 GB/s) +Achieved 51.97070773071573 GFLOPs +Completed in 0.063016496 s (achieved 3.256290289450559 GB/s) +Achieved 51.999082906799515 GFLOPs +Completed in 0.06299262 s (achieved 3.2575245163639805 GB/s) +Achieved 52.01879204262341 GFLOPs +Completed in 0.063035667 s (achieved 3.2552999558170774 GB/s) +Achieved 51.983268456570784 GFLOPs +Completed in 0.063018584 s (achieved 3.2561823985127942 GB/s) +Achieved 51.99736001684836 GFLOPs +Completed in 0.063045558 s (achieved 3.2547892430423095 GB/s) +Achieved 51.97511298099701 GFLOPs +Completed in 0.063010973 s (achieved 3.2565757078532975 GB/s) +Achieved 52.003640699216 GFLOPs +Completed in 0.063109094 s (achieved 3.2515124365436145 GB/s) +Achieved 51.92278627863046 GFLOPs +Durations: [0.062985505, 0.06300256900000001, 0.063050902, 0.063016496, 0.06299262, 0.063035667, 0.063018584, 0.063045558, 0.063010973, 0.063109094] +Median duration 0.063018584 (3.2561823985127942 GB/s) 0.7140750873931566% of peak. +Median achieved 51.99736001684836 GFLOPs +./sycl_spmm 100000 100000 256 128 k 32 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 256 columns. +Using method k with WG size 32 +OK! +Completed in 0.121002775 s (achieved 2.542090493379181 GB/s) +Achieved 54.160741354898676 GFLOPs +Completed in 0.121061784 s (achieved 2.540851405262622 GB/s) +Achieved 54.134341849778124 GFLOPs +Completed in 0.12105779600000001 s (achieved 2.540935108384098 GB/s) +Achieved 54.13612519428323 GFLOPs +Completed in 0.121080214 s (achieved 2.5404646542828213 GB/s) +Achieved 54.12610189142877 GFLOPs +Completed in 0.12105903900000001 s (achieved 2.5409090187804977 GB/s) +Achieved 54.13556933984913 GFLOPs +Completed in 0.121022136 s (achieved 2.5416838122903402 GB/s) +Achieved 54.15207677378955 GFLOPs +Completed in 0.12103975200000001 s (achieved 2.5413138982637706 GB/s) +Achieved 54.14419553668616 GFLOPs +Completed in 0.121105638 s (achieved 2.5399313283829117 GB/s) +Achieved 54.11473906772202 GFLOPs +Completed in 0.121036661 s (achieved 2.5413787976190125 GB/s) +Achieved 54.1455782558311 GFLOPs +Completed in 0.121012855 s (achieved 2.5418787450308487 GB/s) +Achieved 54.15622993110939 GFLOPs +Durations: [0.121002775, 0.121061784, 0.12105779600000001, 0.121080214, 0.12105903900000001, 0.121022136, 0.12103975200000001, 0.121105638, 0.121036661, 0.121012855] +Median duration 0.12105779600000001 (2.540935108384098 GB/s) 0.5572226114877408% of peak. +Median achieved 54.13612519428323 GFLOPs +./sycl_spmm 100000 100000 512 128 k 32 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 512 columns. +Using method k with WG size 32 +OK! +Completed in 0.21434212000000002 s (achieved 2.390570756694951 GB/s) +Achieved 61.150836802397954 GFLOPs +Completed in 0.214441972 s (achieved 2.389457619798423 GB/s) +Achieved 61.12236274342786 GFLOPs +Completed in 0.214434053 s (achieved 2.389545861915878 GB/s) +Achieved 61.124619978152445 GFLOPs +Completed in 0.214292803 s (achieved 2.3911209187925926 GB/s) +Achieved 61.16490995733534 GFLOPs +Completed in 0.214384959 s (achieved 2.3900930661838085 GB/s) +Achieved 61.13861747176023 GFLOPs +Completed in 0.214331338 s (achieved 2.390691015048859 GB/s) +Achieved 61.15391301294447 GFLOPs +Completed in 0.21440762400000002 s (achieved 2.3898404097794583 GB/s) +Achieved 61.13215451704273 GFLOPs +Completed in 0.214352599 s (achieved 2.390453889481415 GB/s) +Achieved 61.14784733727441 GFLOPs +Completed in 0.214377164 s (achieved 2.390179972713885 GB/s) +Achieved 61.14084054213909 GFLOPs +Completed in 0.214307837 s (achieved 2.3909531782545126 GB/s) +Achieved 61.16061915178585 GFLOPs +Durations: [0.21434212000000002, 0.214441972, 0.214434053, 0.214292803, 0.214384959, 0.214331338, 0.21440762400000002, 0.214352599, 0.214377164, 0.214307837] +Median duration 0.214377164 (2.390179972713885 GB/s) 0.5241622747179572% of peak. +Median achieved 61.14084054213909 GFLOPs +./sycl_spmm 100000 100000 1 16 k 64 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 1 columns. +Using method k with WG size 64 +Mismatch at index 3961: SYCL result = 1121.7, Reference = 779.418 +Error! +./sycl_spmm 100000 100000 8 16 k 64 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 8 columns. +Using method k with WG size 64 +Mismatch at index 32096: SYCL result = 870.313, Reference = 528.036 +Error! +./sycl_spmm 100000 100000 32 16 k 64 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 32 columns. +Using method k with WG size 64 +Mismatch at index 127872: SYCL result = 1089.68, Reference = 747.401 +Error! +./sycl_spmm 100000 100000 64 16 k 64 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 64 columns. +Using method k with WG size 64 +Mismatch at index 269184: SYCL result = 1010.67, Reference = 668.393 +Error! +./sycl_spmm 100000 100000 128 16 k 64 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 128 columns. +Using method k with WG size 64 +Mismatch at index 520704: SYCL result = 1204.68, Reference = 862.398 +Error! +./sycl_spmm 100000 100000 256 16 k 64 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 256 columns. +Using method k with WG size 64 +Mismatch at index 1033728: SYCL result = 1594.71, Reference = 1252.43 +Error! +./sycl_spmm 100000 100000 512 16 k 64 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 512 columns. +Using method k with WG size 64 +Mismatch at index 2174464: SYCL result = 1202.13, Reference = 859.853 +Error! +./sycl_spmm 100000 100000 1 32 k 64 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 1 columns. +Using method k with WG size 64 +Mismatch at index 641: SYCL result = 1684.49, Reference = 1568.29 +Error! +./sycl_spmm 100000 100000 8 32 k 64 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 8 columns. +Using method k with WG size 64 +Mismatch at index 5128: SYCL result = 1736.61, Reference = 1568.29 +Error! +./sycl_spmm 100000 100000 32 32 k 64 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 32 columns. +Using method k with WG size 64 +Mismatch at index 20512: SYCL result = 1964.9, Reference = 1568.29 +Error! +./sycl_spmm 100000 100000 64 32 k 64 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 64 columns. +Using method k with WG size 64 +Mismatch at index 41664: SYCL result = 2312.79, Reference = 1749.93 +Error! +./sycl_spmm 100000 100000 128 32 k 64 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 128 columns. +Using method k with WG size 64 +Mismatch at index 82048: SYCL result = 1840.52, Reference = 1568.29 +Error! +./sycl_spmm 100000 100000 256 32 k 64 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 256 columns. +Using method k with WG size 64 +Mismatch at index 167168: SYCL result = 2105.95, Reference = 1509.63 +Error! +./sycl_spmm 100000 100000 512 32 k 64 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 512 columns. +Using method k with WG size 64 +Mismatch at index 331264: SYCL result = 1797.44, Reference = 1117.99 +Error! +./sycl_spmm 100000 100000 1 64 k 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 1 columns. +Using method k with WG size 64 +Mismatch at index 1: SYCL result = 4712.37, Reference = 3308.82 +Error! +./sycl_spmm 100000 100000 8 64 k 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 8 columns. +Using method k with WG size 64 +Mismatch at index 8: SYCL result = 4712.37, Reference = 3308.82 +Error! +./sycl_spmm 100000 100000 32 64 k 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 32 columns. +Using method k with WG size 64 +Mismatch at index 32: SYCL result = 4712.37, Reference = 3308.82 +Error! +./sycl_spmm 100000 100000 64 64 k 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 64 columns. +Using method k with WG size 64 +Mismatch at index 64: SYCL result = 4712.37, Reference = 3308.82 +Error! +./sycl_spmm 100000 100000 128 64 k 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 128 columns. +Using method k with WG size 64 +Mismatch at index 128: SYCL result = 4712.37, Reference = 3308.82 +Error! +./sycl_spmm 100000 100000 256 64 k 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 256 columns. +Using method k with WG size 64 +Mismatch at index 256: SYCL result = 4712.37, Reference = 3308.82 +Error! +./sycl_spmm 100000 100000 512 64 k 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 512 columns. +Using method k with WG size 64 +Mismatch at index 512: SYCL result = 4712.37, Reference = 3308.82 +Error! +./sycl_spmm 100000 100000 1 128 k 64 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 1 columns. +Using method k with WG size 64 +Mismatch at index 4: SYCL result = 9117.94, Reference = 7202.75 +Error! +./sycl_spmm 100000 100000 8 128 k 64 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 8 columns. +Using method k with WG size 64 +Mismatch at index 32: SYCL result = 9117.94, Reference = 7202.75 +Error! +./sycl_spmm 100000 100000 32 128 k 64 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 32 columns. +Using method k with WG size 64 +Mismatch at index 128: SYCL result = 9117.94, Reference = 7202.75 +Error! +./sycl_spmm 100000 100000 64 128 k 64 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 64 columns. +Using method k with WG size 64 +Mismatch at index 256: SYCL result = 9117.94, Reference = 7202.75 +Error! +./sycl_spmm 100000 100000 128 128 k 64 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 128 columns. +Using method k with WG size 64 +Mismatch at index 512: SYCL result = 9117.94, Reference = 7202.75 +Error! +./sycl_spmm 100000 100000 256 128 k 64 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 256 columns. +Using method k with WG size 64 +Mismatch at index 1024: SYCL result = 9117.94, Reference = 7202.75 +Error! +./sycl_spmm 100000 100000 512 128 k 64 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 512 columns. +Using method k with WG size 64 +Mismatch at index 2048: SYCL result = 9117.94, Reference = 7202.75 +Error! +./sycl_spmm 100000 100000 1 16 k 128 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 1 columns. +Using method k with WG size 128 +Mismatch at index 3537: SYCL result = 1173.01, Reference = 830.735 +Error! +./sycl_spmm 100000 100000 8 16 k 128 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 8 columns. +Using method k with WG size 128 +Mismatch at index 29296: SYCL result = 1407.69, Reference = 1065.41 +Error! +./sycl_spmm 100000 100000 32 16 k 128 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 32 columns. +Using method k with WG size 128 +Mismatch at index 116416: SYCL result = 897.892, Reference = 555.614 +Error! +./sycl_spmm 100000 100000 64 16 k 128 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 64 columns. +Using method k with WG size 128 +Mismatch at index 237184: SYCL result = 1064.44, Reference = 722.167 +Error! +./sycl_spmm 100000 100000 128 16 k 128 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 128 columns. +Using method k with WG size 128 +Mismatch at index 483840: SYCL result = 1428.88, Reference = 1086.6 +Error! +./sycl_spmm 100000 100000 256 16 k 128 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 256 columns. +Using method k with WG size 128 +Mismatch at index 943616: SYCL result = 824.614, Reference = 482.337 +Error! +./sycl_spmm 100000 100000 512 16 k 128 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 512 columns. +Using method k with WG size 128 +Mismatch at index 1948672: SYCL result = 1345.45, Reference = 1003.17 +Error! +./sycl_spmm 100000 100000 1 32 k 128 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 1 columns. +Using method k with WG size 128 +Mismatch at index 321: SYCL result = 1662.49, Reference = 1300.21 +Error! +./sycl_spmm 100000 100000 8 32 k 128 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 8 columns. +Using method k with WG size 128 +Mismatch at index 2592: SYCL result = 1370.41, Reference = 1246.84 +Error! +./sycl_spmm 100000 100000 32 32 k 128 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 32 columns. +Using method k with WG size 128 +Mismatch at index 10272: SYCL result = 1662.49, Reference = 1300.21 +Error! +./sycl_spmm 100000 100000 64 32 k 128 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 64 columns. +Using method k with WG size 128 +Mismatch at index 20928: SYCL result = 1421.53, Reference = 1257.24 +Error! +./sycl_spmm 100000 100000 128 32 k 128 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 128 columns. +Using method k with WG size 128 +Mismatch at index 41088: SYCL result = 1572.44, Reference = 1300.21 +Error! +./sycl_spmm 100000 100000 256 32 k 128 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 256 columns. +Using method k with WG size 128 +Mismatch at index 82944: SYCL result = 1526.69, Reference = 1246.84 +Error! +./sycl_spmm 100000 100000 512 32 k 128 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 512 columns. +Using method k with WG size 128 +Mismatch at index 168448: SYCL result = 1361.15, Reference = 1196.85 +Error! +./sycl_spmm 100000 100000 1 64 k 128 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 1 columns. +Using method k with WG size 128 +Mismatch at index 321: SYCL result = 3309.85, Reference = 3130.25 +Error! +./sycl_spmm 100000 100000 8 64 k 128 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 8 columns. +Using method k with WG size 128 +Mismatch at index 2568: SYCL result = 3424.88, Reference = 3130.25 +Error! +./sycl_spmm 100000 100000 32 64 k 128 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 32 columns. +Using method k with WG size 128 +Mismatch at index 10304: SYCL result = 2966.43, Reference = 2950.58 +Error! +./sycl_spmm 100000 100000 64 64 k 128 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 64 columns. +Using method k with WG size 128 +Mismatch at index 20800: SYCL result = 2852.05, Reference = 2753.03 +Error! +./sycl_spmm 100000 100000 128 64 k 128 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 128 columns. +Using method k with WG size 128 +Mismatch at index 41088: SYCL result = 3248.17, Reference = 3130.25 +Error! +./sycl_spmm 100000 100000 256 64 k 128 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 256 columns. +Using method k with WG size 128 +Mismatch at index 83200: SYCL result = 3029.93, Reference = 2753.03 +Error! +./sycl_spmm 100000 100000 512 64 k 128 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 512 columns. +Using method k with WG size 128 +Mismatch at index 164864: SYCL result = 3345.96, Reference = 2950.58 +Error! +./sycl_spmm 100000 100000 1 128 k 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 1 columns. +Using method k with WG size 128 +Mismatch at index 4: SYCL result = 12581.7, Reference = 7202.75 +Error! +./sycl_spmm 100000 100000 8 128 k 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 8 columns. +Using method k with WG size 128 +Mismatch at index 32: SYCL result = 12581.7, Reference = 7202.75 +Error! +./sycl_spmm 100000 100000 32 128 k 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 32 columns. +Using method k with WG size 128 +Mismatch at index 128: SYCL result = 12581.7, Reference = 7202.75 +Error! +./sycl_spmm 100000 100000 64 128 k 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 64 columns. +Using method k with WG size 128 +Mismatch at index 256: SYCL result = 12581.7, Reference = 7202.75 +Error! +./sycl_spmm 100000 100000 128 128 k 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 128 columns. +Using method k with WG size 128 +Mismatch at index 512: SYCL result = 12581.7, Reference = 7202.75 +Error! +./sycl_spmm 100000 100000 256 128 k 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 256 columns. +Using method k with WG size 128 +Mismatch at index 1024: SYCL result = 12581.7, Reference = 7202.75 +Error! +./sycl_spmm 100000 100000 512 128 k 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 512 columns. +Using method k with WG size 128 +Mismatch at index 2048: SYCL result = 12581.7, Reference = 7202.75 +Error! +./sycl_spmm 100000 100000 1 16 k 256 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 1 columns. +Using method k with WG size 256 +Mismatch at index 3526: SYCL result = 1162.15, Reference = 819.869 +Error! +./sycl_spmm 100000 100000 8 16 k 256 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 8 columns. +Using method k with WG size 256 +Mismatch at index 28984: SYCL result = 807.465, Reference = 465.187 +Error! +./sycl_spmm 100000 100000 32 16 k 256 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 32 columns. +Using method k with WG size 256 +Mismatch at index 114144: SYCL result = 1211.26, Reference = 868.987 +Error! +./sycl_spmm 100000 100000 64 16 k 256 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 64 columns. +Using method k with WG size 256 +Mismatch at index 225344: SYCL result = 757.804, Reference = 415.527 +Error! +./sycl_spmm 100000 100000 128 16 k 256 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 128 columns. +Using method k with WG size 256 +Mismatch at index 452736: SYCL result = 1173.01, Reference = 830.735 +Error! +./sycl_spmm 100000 100000 256 16 k 256 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 256 columns. +Using method k with WG size 256 +Mismatch at index 906496: SYCL result = 980.613, Reference = 638.336 +Error! +./sycl_spmm 100000 100000 512 16 k 256 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 512 columns. +Using method k with WG size 256 +Mismatch at index 1829376: SYCL result = 899.039, Reference = 556.762 +Error! +./sycl_spmm 100000 100000 1 32 k 256 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 1 columns. +Using method k with WG size 256 +Mismatch at index 160: SYCL result = 2201.33, Reference = 1583.33 +Error! +./sycl_spmm 100000 100000 8 32 k 256 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 8 columns. +Using method k with WG size 256 +Mismatch at index 1288: SYCL result = 1572.88, Reference = 1403.33 +Error! +./sycl_spmm 100000 100000 32 32 k 256 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 32 columns. +Using method k with WG size 256 +Mismatch at index 5120: SYCL result = 2058.24, Reference = 1583.33 +Error! +./sycl_spmm 100000 100000 64 32 k 256 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 64 columns. +Using method k with WG size 256 +Mismatch at index 10880: SYCL result = 1446.4, Reference = 1354.84 +Error! +./sycl_spmm 100000 100000 128 32 k 256 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 128 columns. +Using method k with WG size 256 +Mismatch at index 20864: SYCL result = 1519.96, Reference = 1257.21 +Error! +./sycl_spmm 100000 100000 256 32 k 256 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 256 columns. +Using method k with WG size 256 +Mismatch at index 40960: SYCL result = 1657.92, Reference = 1583.33 +Error! +./sycl_spmm 100000 100000 512 32 k 256 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 512 columns. +Using method k with WG size 256 +Mismatch at index 81920: SYCL result = 1752.88, Reference = 1583.33 +Error! +./sycl_spmm 100000 100000 1 64 k 256 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 1 columns. +Using method k with WG size 256 +Mismatch at index 161: SYCL result = 3917.13, Reference = 2978.64 +Error! +./sycl_spmm 100000 100000 8 64 k 256 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 8 columns. +Using method k with WG size 256 +Mismatch at index 1288: SYCL result = 3034.37, Reference = 2978.64 +Error! +./sycl_spmm 100000 100000 32 64 k 256 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 32 columns. +Using method k with WG size 256 +Mismatch at index 5248: SYCL result = 3301.97, Reference = 2925.94 +Error! +./sycl_spmm 100000 100000 64 64 k 256 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 64 columns. +Using method k with WG size 256 +Mismatch at index 10368: SYCL result = 3575.04, Reference = 3395.45 +Error! +./sycl_spmm 100000 100000 128 64 k 256 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 128 columns. +Using method k with WG size 256 +Mismatch at index 20736: SYCL result = 3759.5, Reference = 3395.45 +Error! +./sycl_spmm 100000 100000 256 64 k 256 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 256 columns. +Using method k with WG size 256 +Mismatch at index 41728: SYCL result = 2792.14, Reference = 2661.68 +Error! +./sycl_spmm 100000 100000 512 64 k 256 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 512 columns. +Using method k with WG size 256 +Mismatch at index 82944: SYCL result = 3628.34, Reference = 3395.45 +Error! +./sycl_spmm 100000 100000 1 128 k 256 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 1 columns. +Using method k with WG size 256 +Mismatch at index 163: SYCL result = 6546.47, Reference = 5830.16 +Error! +./sycl_spmm 100000 100000 8 128 k 256 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 8 columns. +Using method k with WG size 256 +Mismatch at index 1296: SYCL result = 7563.46, Reference = 6767.21 +Error! +./sycl_spmm 100000 100000 32 128 k 256 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 32 columns. +Using method k with WG size 256 +Mismatch at index 5152: SYCL result = 6015.98, Reference = 5826.36 +Error! +./sycl_spmm 100000 100000 64 128 k 256 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 64 columns. +Using method k with WG size 256 +Mismatch at index 10304: SYCL result = 7756.48, Reference = 5826.36 +Error! +./sycl_spmm 100000 100000 128 128 k 256 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 128 columns. +Using method k with WG size 256 +Mismatch at index 20608: SYCL result = 6012, Reference = 5826.36 +Error! +./sycl_spmm 100000 100000 256 128 k 256 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 256 columns. +Using method k with WG size 256 +Mismatch at index 41472: SYCL result = 7003.54, Reference = 6767.21 +Error! +./sycl_spmm 100000 100000 512 128 k 256 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 512 columns. +Using method k with WG size 256 +Mismatch at index 83968: SYCL result = 6911.27, Reference = 6184.16 +Error! +./sycl_spmm 100000 100000 1 16 j 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 1 columns. +Using method j with WG size 16 +OK! +Completed in 0.001105567 s (achieved 12.663189114725748 GB/s) +Achieved 2.894442399239485 GFLOPs +Completed in 0.0011078820000000001 s (achieved 12.636728460251183 GB/s) +Achieved 2.888394251373341 GFLOPs +Completed in 0.0011053430000000002 s (achieved 12.665755335674085 GB/s) +Achieved 2.895028963860087 GFLOPs +Completed in 0.001103147 s (achieved 12.690968656035869 GB/s) +Achieved 2.900792006867625 GFLOPs +Completed in 0.001107446 s (achieved 12.641703523241766 GB/s) +Achieved 2.88953140830343 GFLOPs +Completed in 0.001102748 s (achieved 12.695560545110943 GB/s) +Achieved 2.901841581213478 GFLOPs +Completed in 0.001109186 s (achieved 12.621872255870523 GB/s) +Achieved 2.8849985484851053 GFLOPs +Completed in 0.001110276 s (achieved 12.609480885833793 GB/s) +Achieved 2.882166236143085 GFLOPs +Completed in 0.0011089930000000002 s (achieved 12.62406886247253 GB/s) +Achieved 2.885500629850684 GFLOPs +Completed in 0.001110253 s (achieved 12.609742103826786 GB/s) +Achieved 2.882225943095853 GFLOPs +Durations: [0.001105567, 0.0011078820000000001, 0.0011053430000000002, 0.001103147, 0.001107446, 0.001102748, 0.001109186, 0.001110276, 0.0011089930000000002, 0.001110253] +Median duration 0.0011078820000000001 (12.636728460251183 GB/s) 2.7712123816340313% of peak. +Median achieved 2.888394251373341 GFLOPs +./sycl_spmm 100000 100000 8 16 j 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 8 columns. +Using method j with WG size 16 +OK! +Completed in 0.000317781 s (achieved 61.677708862392656 GB/s) +Achieved 80.55862370626312 GFLOPs +Completed in 0.00031667800000000003 s (achieved 61.89253437245404 GB/s) +Achieved 80.83921207030485 GFLOPs +Completed in 0.000318339 s (achieved 61.56959719041651 GB/s) +Achieved 80.41741665331612 GFLOPs +Completed in 0.000318061 s (achieved 61.623411861246744 GB/s) +Achieved 80.48770518862734 GFLOPs +Completed in 0.000316481 s (achieved 61.931060632391834 GB/s) +Achieved 80.88953207301545 GFLOPs +Completed in 0.00031800400000000004 s (achieved 61.63445742820844 GB/s) +Achieved 80.50213204865346 GFLOPs +Completed in 0.00031757000000000004 s (achieved 61.718688793022004 GB/s) +Achieved 80.61214850269232 GFLOPs +Completed in 0.000316394 s (achieved 61.94809003963413 GB/s) +Achieved 80.91177455956813 GFLOPs +Completed in 0.000317774 s (achieved 61.679067513390024 GB/s) +Achieved 80.56039827046895 GFLOPs +Completed in 0.000318008 s (achieved 61.6336821715177 GB/s) +Achieved 80.50111946869262 GFLOPs +Durations: [0.000317781, 0.00031667800000000003, 0.000318339, 0.000318061, 0.000316481, 0.00031800400000000004, 0.00031757000000000004, 0.000316394, 0.000317774, 0.000318008] +Median duration 0.000317781 (61.677708862392656 GB/s) 13.525813347015932% of peak. +Median achieved 80.55862370626312 GFLOPs +./sycl_spmm 100000 100000 32 16 j 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 32 columns. +Using method j with WG size 16 +OK! +Completed in 0.000530258 s (achieved 73.17193517118082 GB/s) +Achieved 193.11354095553486 GFLOPs +Completed in 0.000530134 s (achieved 73.18905031558059 GB/s) +Achieved 193.15871081651053 GFLOPs +Completed in 0.000531318 s (achieved 73.02595432490523 GB/s) +Achieved 192.72827195766 GFLOPs +Completed in 0.000530374 s (achieved 73.15593147477063 GB/s) +Achieved 193.07130440029113 GFLOPs +Completed in 0.00053147 s (achieved 73.00506895967787 GB/s) +Achieved 192.67315182418574 GFLOPs +Completed in 0.0005302530000000001 s (achieved 73.17262514309206 GB/s) +Achieved 193.1153619121438 GFLOPs +Completed in 0.000530704 s (achieved 73.11044197895625 GB/s) +Achieved 192.9512496608279 GFLOPs +Completed in 0.000532335 s (achieved 72.88644180825983 GB/s) +Achieved 192.3600740135441 GFLOPs +Completed in 0.000531407 s (achieved 73.01372394417085 GB/s) +Achieved 192.69599384276083 GFLOPs +Completed in 0.000530451 s (achieved 73.1453121966025 GB/s) +Achieved 193.04327826698412 GFLOPs +Durations: [0.000530258, 0.000530134, 0.000531318, 0.000530374, 0.00053147, 0.0005302530000000001, 0.000530704, 0.000532335, 0.000531407, 0.000530451] +Median duration 0.000530704 (73.11044197895625 GB/s) 16.032991662051806% of peak. +Median achieved 192.9512496608279 GFLOPs +./sycl_spmm 100000 100000 64 16 j 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 64 columns. +Using method j with WG size 16 +OK! +Completed in 0.0009458330000000001 s (achieved 68.08813395176527 GB/s) +Achieved 216.52871067091124 GFLOPs +Completed in 0.000943934 s (achieved 68.22511319647349 GB/s) +Achieved 216.96432165808204 GFLOPs +Completed in 0.0009435240000000001 s (achieved 68.25475981533062 GB/s) +Achieved 217.0586015830016 GFLOPs +Completed in 0.000944391 s (achieved 68.19209839992122 GB/s) +Achieved 216.85933051035005 GFLOPs +Completed in 0.0009449100000000001 s (achieved 68.15464329936185 GB/s) +Achieved 216.74021864516197 GFLOPs +Completed in 0.000945043 s (achieved 68.14505160082663 GB/s) +Achieved 216.70971585419923 GFLOPs +Completed in 0.000943935 s (achieved 68.22504091913109 GB/s) +Achieved 216.96409180716893 GFLOPs +Completed in 0.000945624 s (achieved 68.10318266033858 GB/s) +Achieved 216.57656743060667 GFLOPs +Completed in 0.000944335 s (achieved 68.19614225883824 GB/s) +Achieved 216.8721904832501 GFLOPs +Completed in 0.000945513 s (achieved 68.11117774160695 GB/s) +Achieved 216.60199278063865 GFLOPs +Durations: [0.0009458330000000001, 0.000943934, 0.0009435240000000001, 0.000944391, 0.0009449100000000001, 0.000945043, 0.000943935, 0.000945624, 0.000944335, 0.000945513] +Median duration 0.0009449100000000001 (68.15464329936185 GB/s) 14.946193706000406% of peak. +Median achieved 216.74021864516197 GFLOPs +./sycl_spmm 100000 100000 128 16 j 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 128 columns. +Using method j with WG size 16 +OK! +Completed in 0.001790481 s (achieved 64.56365859230006 GB/s) +Achieved 228.7653429441586 GFLOPs +Completed in 0.0017915040000000002 s (achieved 64.52679089748055 GB/s) +Achieved 228.63471139333208 GFLOPs +Completed in 0.0017916050000000002 s (achieved 64.52315326201925 GB/s) +Achieved 228.6218223324896 GFLOPs +Completed in 0.0017916590000000001 s (achieved 64.52120855586917 GB/s) +Achieved 228.61493174761492 GFLOPs +Completed in 0.0017922340000000002 s (achieved 64.50050830416117 GB/s) +Achieved 228.54158552956812 GFLOPs +Completed in 0.0017932570000000001 s (achieved 64.46371267475882 GB/s) +Achieved 228.41120932470918 GFLOPs +Completed in 0.001791784 s (achieved 64.51670737097776 GB/s) +Achieved 228.598982913119 GFLOPs +Completed in 0.0017932890000000002 s (achieved 64.46256236445994 GB/s) +Achieved 228.4071334848984 GFLOPs +Completed in 0.001792481 s (achieved 64.49162027379927 GB/s) +Achieved 228.5100929940122 GFLOPs +Completed in 0.0017923420000000002 s (achieved 64.49662173848517 GB/s) +Achieved 228.5278144461269 GFLOPs +Durations: [0.001790481, 0.0017915040000000002, 0.0017916050000000002, 0.0017916590000000001, 0.0017922340000000002, 0.0017932570000000001, 0.001791784, 0.0017932890000000002, 0.001792481, 0.0017923420000000002] +Median duration 0.0017922340000000002 (64.50050830416117 GB/s) 14.144848312316046% of peak. +Median achieved 228.54158552956812 GFLOPs +./sycl_spmm 100000 100000 256 16 j 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 256 columns. +Using method j with WG size 16 +OK! +Completed in 0.00355288 s (achieved 61.358673526828944 GB/s) +Achieved 230.5735065636892 GFLOPs +Completed in 0.0035493390000000003 s (achieved 61.419888041125404 GB/s) +Achieved 230.80353834897144 GFLOPs +Completed in 0.003551554 s (achieved 61.38158225948416 GB/s) +Achieved 230.659592955647 GFLOPs +Completed in 0.0035505640000000004 s (achieved 61.39869722106122 GB/s) +Achieved 230.7239075256776 GFLOPs +Completed in 0.003552392 s (achieved 61.36710250445334 GB/s) +Achieved 230.60518095975894 GFLOPs +Completed in 0.003553937 s (achieved 61.340424436336384 GB/s) +Achieved 230.5049301661791 GFLOPs +Completed in 0.0035513890000000003 s (achieved 61.384434090436166 GB/s) +Achieved 230.6703095605691 GFLOPs +Completed in 0.0035520380000000004 s (achieved 61.37321841714531 GB/s) +Achieved 230.62816332482927 GFLOPs +Completed in 0.0035519230000000002 s (achieved 61.37520548728112 GB/s) +Achieved 230.6356303332026 GFLOPs +Completed in 0.0035501240000000004 s (achieved 61.40630693463102 GB/s) +Achieved 230.7525032928427 GFLOPs +Durations: [0.00355288, 0.0035493390000000003, 0.003551554, 0.0035505640000000004, 0.003552392, 0.003553937, 0.0035513890000000003, 0.0035520380000000004, 0.0035519230000000002, 0.0035501240000000004] +Median duration 0.0035519230000000002 (61.37520548728112 GB/s) 13.45947488756165% of peak. +Median achieved 230.6356303332026 GFLOPs +./sycl_spmm 100000 100000 512 16 j 16 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 512 columns. +Using method j with WG size 16 +OK! +Completed in 0.007071722000000001 s (achieved 59.78741867963701 GB/s) +Achieved 231.6833156054494 GFLOPs +Completed in 0.007071709000000001 s (achieved 59.78752858750268 GB/s) +Achieved 231.68374151142248 GFLOPs +Completed in 0.00707348 s (achieved 59.77255947567534 GB/s) +Achieved 231.62573443340477 GFLOPs +Completed in 0.007068305 s (achieved 59.816321451889806 GB/s) +Achieved 231.7953172648888 GFLOPs +Completed in 0.007069446 s (achieved 59.80666717024219 GB/s) +Achieved 231.7579057821504 GFLOPs +Completed in 0.007068092000000001 s (achieved 59.81812404252802 GB/s) +Achieved 231.80230251671878 GFLOPs +Completed in 0.007068527000000001 s (achieved 59.814442811069405 GB/s) +Achieved 231.788037309612 GFLOPs +Completed in 0.007067581000000001 s (achieved 59.822449010488874 GB/s) +Achieved 231.8190622788759 GFLOPs +Completed in 0.007070425000000001 s (achieved 59.79838609418811 GB/s) +Achieved 231.72581563343078 GFLOPs +Completed in 0.007068009 s (achieved 59.8188264898927 GB/s) +Achieved 231.80502458330204 GFLOPs +Durations: [0.007071722000000001, 0.007071709000000001, 0.00707348, 0.007068305, 0.007069446, 0.007068092000000001, 0.007068527000000001, 0.007067581000000001, 0.007070425000000001, 0.007068009] +Median duration 0.007069446 (59.80666717024219 GB/s) 13.11549718645662% of peak. +Median achieved 231.7579057821504 GFLOPs +./sycl_spmm 100000 100000 1 32 j 16 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 1 columns. +Using method j with WG size 16 +OK! +Completed in 0.002288123 s (achieved 11.71265880374438 GB/s) +Achieved 2.7970524311848624 GFLOPs +Completed in 0.002275432 s (achieved 11.777985015592645 GB/s) +Achieved 2.8126527182530614 GFLOPs +Completed in 0.002299301 s (achieved 11.655717976898197 GB/s) +Achieved 2.7834546238182822 GFLOPs +Completed in 0.00227742 s (achieved 11.767703805183059 GB/s) +Achieved 2.810197504193342 GFLOPs +Completed in 0.002272227 s (achieved 11.79459798690888 GB/s) +Achieved 2.8166199943931662 GFLOPs +Completed in 0.002274618 s (achieved 11.782199912249004 GB/s) +Achieved 2.813659260587932 GFLOPs +Completed in 0.002279306 s (achieved 11.757966679331341 GB/s) +Achieved 2.807872220754914 GFLOPs +Completed in 0.002286539 s (achieved 11.720772748682617 GB/s) +Achieved 2.7989900893883726 GFLOPs +Completed in 0.0022824390000000003 s (achieved 11.741827054304627 GB/s) +Achieved 2.804017982517824 GFLOPs +Completed in 0.002287035 s (achieved 11.718230809760236 GB/s) +Achieved 2.798383059288555 GFLOPs +Durations: [0.002288123, 0.002275432, 0.002299301, 0.00227742, 0.002272227, 0.002274618, 0.002279306, 0.002286539, 0.0022824390000000003, 0.002287035] +Median duration 0.0022824390000000003 (11.741827054304627 GB/s) 2.574962073312418% of peak. +Median achieved 2.804017982517824 GFLOPs +./sycl_spmm 100000 100000 8 32 j 16 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 8 columns. +Using method j with WG size 16 +OK! +Completed in 0.0005996500000000001 s (achieved 54.03152505628283 GB/s) +Achieved 85.3831401650963 GFLOPs +Completed in 0.0005978330000000001 s (achieved 54.19574362740096 GB/s) +Achieved 85.64264602322052 GFLOPs +Completed in 0.000601452 s (achieved 53.86964213270552 GB/s) +Achieved 85.127325206334 GFLOPs +Completed in 0.0005982860000000001 s (achieved 54.15470861761766 GB/s) +Achieved 85.57780058366734 GFLOPs +Completed in 0.0005983690000000001 s (achieved 54.147196796625494 GB/s) +Achieved 85.56593005319459 GFLOPs +Completed in 0.000599978 s (achieved 54.00198673951378 GB/s) +Achieved 85.33646233695235 GFLOPs +Completed in 0.000600329 s (achieved 53.97041289026517 GB/s) +Achieved 85.2865678652872 GFLOPs +Completed in 0.0005996630000000001 s (achieved 54.03035371533678 GB/s) +Achieved 85.38128915741008 GFLOPs +Completed in 0.0005993480000000001 s (achieved 54.05875050888632 GB/s) +Achieved 85.42616309723232 GFLOPs +Completed in 0.0006000350000000001 s (achieved 53.99685685001708 GB/s) +Achieved 85.32835584590899 GFLOPs +Durations: [0.0005996500000000001, 0.0005978330000000001, 0.000601452, 0.0005982860000000001, 0.0005983690000000001, 0.000599978, 0.000600329, 0.0005996630000000001, 0.0005993480000000001, 0.0006000350000000001] +Median duration 0.0005996630000000001 (54.03035371533678 GB/s) 11.848761779679117% of peak. +Median achieved 85.38128915741008 GFLOPs +./sycl_spmm 100000 100000 32 32 j 16 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 32 columns. +Using method j with WG size 16 +OK! +Completed in 0.001016668 s (achieved 50.754035732412156 GB/s) +Achieved 201.44235876411966 GFLOPs +Completed in 0.001015696 s (achieved 50.80260629164633 GB/s) +Achieved 201.63513492226022 GFLOPs +Completed in 0.001016956 s (achieved 50.73966228627394 GB/s) +Achieved 201.38531067224147 GFLOPs +Completed in 0.001014443 s (achieved 50.86535566808584 GB/s) +Achieved 201.88418669161302 GFLOPs +Completed in 0.0010158110000000001 s (achieved 50.796854926753106 GB/s) +Achieved 201.61230780135278 GFLOPs +Completed in 0.001014803 s (achieved 50.8473112515434 GB/s) +Achieved 201.8125685477871 GFLOPs +Completed in 0.001014431 s (achieved 50.865957369205 GB/s) +Achieved 201.8865748385055 GFLOPs +Completed in 0.001014512 s (achieved 50.86189616288423 GB/s) +Achieved 201.87045594335012 GFLOPs +Completed in 0.001015143 s (achieved 50.83028105399929 GB/s) +Achieved 201.7449758309913 GFLOPs +Completed in 0.001016638 s (achieved 50.75553343471324 GB/s) +Achieved 201.44830313248175 GFLOPs +Durations: [0.001016668, 0.001015696, 0.001016956, 0.001014443, 0.0010158110000000001, 0.001014803, 0.001014431, 0.001014512, 0.001015143, 0.001016638] +Median duration 0.001015696 (50.80260629164633 GB/s) 11.14092243237858% of peak. +Median achieved 201.63513492226022 GFLOPs +./sycl_spmm 100000 100000 64 32 j 16 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 64 columns. +Using method j with WG size 16 +OK! +Completed in 0.001838734 s (achieved 41.98541170174696 GB/s) +Achieved 222.76196556978877 GFLOPs +Completed in 0.0018384520000000002 s (achieved 41.99185184057021 GB/s) +Achieved 222.79613500923602 GFLOPs +Completed in 0.0018397930000000002 s (achieved 41.96124455305569 GB/s) +Achieved 222.63374194814307 GFLOPs +Completed in 0.001838865 s (achieved 41.982420677972556 GB/s) +Achieved 222.7460960973209 GFLOPs +Completed in 0.001840292 s (achieved 41.949866651596594 GB/s) +Achieved 222.57337422539467 GFLOPs +Completed in 0.0018394750000000001 s (achieved 41.96849862053031 GB/s) +Achieved 222.67222984819037 GFLOPs +Completed in 0.0018388330000000002 s (achieved 41.983151270398125 GB/s) +Achieved 222.74997240097386 GFLOPs +Completed in 0.0018380620000000001 s (achieved 42.00076167180433 GB/s) +Achieved 222.84340789374895 GFLOPs +Completed in 0.0018383990000000001 s (achieved 41.99306244183118 GB/s) +Achieved 222.80255809538625 GFLOPs +Completed in 0.001839399 s (achieved 41.97023266838788 GB/s) +Achieved 222.6814301845331 GFLOPs +Durations: [0.001838734, 0.0018384520000000002, 0.0018397930000000002, 0.001838865, 0.001840292, 0.0018394750000000001, 0.0018388330000000002, 0.0018380620000000001, 0.0018383990000000001, 0.001839399] +Median duration 0.001838865 (41.982420677972556 GB/s) 9.20667120130977% of peak. +Median achieved 222.7460960973209 GFLOPs +./sycl_spmm 100000 100000 128 32 j 16 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 128 columns. +Using method j with WG size 16 +OK! +Completed in 0.0035152200000000003 s (achieved 36.526875700525146 GB/s) +Achieved 233.04373552722163 GFLOPs +Completed in 0.003512709 s (achieved 36.55298631341224 GB/s) +Achieved 233.21032285908112 GFLOPs +Completed in 0.0035154310000000003 s (achieved 36.524683317635876 GB/s) +Achieved 233.02974798822675 GFLOPs +Completed in 0.0035157710000000004 s (achieved 36.52115112161742 GB/s) +Achieved 233.00721235825654 GFLOPs +Completed in 0.0035142470000000003 s (achieved 36.53698900504148 GB/s) +Achieved 233.1082590381382 GFLOPs +Completed in 0.0035161740000000004 s (achieved 36.51696531514083 GB/s) +Achieved 232.98050665296995 GFLOPs +Completed in 0.003516837 s (achieved 36.51008107569387 GB/s) +Achieved 232.93658477774204 GFLOPs +Completed in 0.003515077 s (achieved 36.528361683115335 GB/s) +Achieved 233.05321618843627 GFLOPs +Completed in 0.0035157200000000004 s (achieved 36.52168090746704 GB/s) +Achieved 233.01059242488023 GFLOPs +Completed in 0.003515334 s (achieved 36.52569115765387 GB/s) +Achieved 233.036178070135 GFLOPs +Durations: [0.0035152200000000003, 0.003512709, 0.0035154310000000003, 0.0035157710000000004, 0.0035142470000000003, 0.0035161740000000004, 0.003516837, 0.003515077, 0.0035157200000000004, 0.003515334] +Median duration 0.0035154310000000003 (36.524683317635876 GB/s) 8.009798973165761% of peak. +Median achieved 233.02974798822675 GFLOPs +./sycl_spmm 100000 100000 256 32 j 16 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 256 columns. +Using method j with WG size 16 +OK! +Completed in 0.006997356000000001 s (achieved 32.98388762841279 GB/s) +Achieved 234.1455829887746 GFLOPs +Completed in 0.0069961780000000005 s (achieved 32.989441377849445 GB/s) +Achieved 234.18500787144066 GFLOPs +Completed in 0.0069939040000000004 s (achieved 33.00016757450488 GB/s) +Achieved 234.26115085365768 GFLOPs +Completed in 0.0069949800000000005 s (achieved 32.995091336930194 GB/s) +Achieved 234.2251157258491 GFLOPs +Completed in 0.006994012000000001 s (achieved 32.99965799315185 GB/s) +Achieved 234.25753344432349 GFLOPs +Completed in 0.006992847000000001 s (achieved 33.00515569695719 GB/s) +Achieved 234.296560471007 GFLOPs +Completed in 0.006997228 s (achieved 32.984491001293655 GB/s) +Achieved 234.1498662041597 GFLOPs +Completed in 0.006993396000000001 s (achieved 33.00256470533057 GB/s) +Achieved 234.27816757409417 GFLOPs +Completed in 0.006996204000000001 s (achieved 32.98931877915509 GB/s) +Achieved 234.1841375694591 GFLOPs +Completed in 0.0069950030000000005 s (achieved 32.99498284704095 GB/s) +Achieved 234.22434557926564 GFLOPs +Durations: [0.006997356000000001, 0.0069961780000000005, 0.0069939040000000004, 0.0069949800000000005, 0.006994012000000001, 0.006992847000000001, 0.006997228, 0.006993396000000001, 0.006996204000000001, 0.0069950030000000005] +Median duration 0.0069950030000000005 (32.99498284704095 GB/s) 7.235741852421261% of peak. +Median achieved 234.22434557926564 GFLOPs +./sycl_spmm 100000 100000 512 32 j 16 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 512 columns. +Using method j with WG size 16 +OK! +Completed in 0.014000538000000002 s (achieved 31.113090368384416 GB/s) +Achieved 234.04814872114198 GFLOPs +Completed in 0.013992663 s (achieved 31.130600658359313 GB/s) +Achieved 234.1798698360705 GFLOPs +Completed in 0.0139938 s (achieved 31.128071288713574 GB/s) +Achieved 234.1608426588918 GFLOPs +Completed in 0.013992378000000001 s (achieved 31.13123473365285 GB/s) +Achieved 234.1846396659667 GFLOPs +Completed in 0.013992893000000001 s (achieved 31.130088967306474 GB/s) +Achieved 234.17602064133555 GFLOPs +Completed in 0.013993471 s (achieved 31.128803139692792 GB/s) +Achieved 234.1663480061523 GFLOPs +Completed in 0.013995416 s (achieved 31.12447704305467 GB/s) +Achieved 234.13380495442223 GFLOPs +Completed in 0.013988908000000001 s (achieved 31.138956950749836 GB/s) +Achieved 234.2427300258176 GFLOPs +Completed in 0.013995822000000002 s (achieved 31.123574163775448 GB/s) +Achieved 234.12701304717936 GFLOPs +Completed in 0.013993221 s (achieved 31.129359280468737 GB/s) +Achieved 234.1705315738242 GFLOPs +Durations: [0.014000538000000002, 0.013992663, 0.0139938, 0.013992378000000001, 0.013992893000000001, 0.013993471, 0.013995416, 0.013988908000000001, 0.013995822000000002, 0.013993221] +Median duration 0.013993471 (31.128803139692792 GB/s) 6.826491916599296% of peak. +Median achieved 234.1663480061523 GFLOPs +./sycl_spmm 100000 100000 1 64 j 16 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 1 columns. +Using method j with WG size 16 +OK! +Completed in 0.00452093 s (achieved 11.59053646041854 GB/s) +Achieved 2.8312758658063717 GFLOPs +Completed in 0.004561929 s (achieved 11.486369910623335 GB/s) +Achieved 2.8058306036766463 GFLOPs +Completed in 0.004497557 s (achieved 11.650770407134361 GB/s) +Achieved 2.8459895005221725 GFLOPs +Completed in 0.004593662 s (achieved 11.407022110028992 GB/s) +Achieved 2.7864479363087664 GFLOPs +Completed in 0.004552553 s (achieved 11.510026132589779 GB/s) +Achieved 2.8116092223418376 GFLOPs +Completed in 0.00453267 s (achieved 11.560515987265784 GB/s) +Achieved 2.8239426210158696 GFLOPs +Completed in 0.004565212 s (achieved 11.478109669386658 GB/s) +Achieved 2.803812834978967 GFLOPs +Completed in 0.004560568 s (achieved 11.48979776203315 GB/s) +Achieved 2.80666794136169 GFLOPs +Completed in 0.00452899 s (achieved 11.569909405849868 GB/s) +Achieved 2.826237196372701 GFLOPs +Completed in 0.004573718 s (achieved 11.456763184787519 GB/s) +Achieved 2.7985984269253157 GFLOPs +Durations: [0.00452093, 0.004561929, 0.004497557, 0.004593662, 0.004552553, 0.00453267, 0.004565212, 0.004560568, 0.00452899, 0.004573718] +Median duration 0.004560568 (11.48979776203315 GB/s) 2.5196924916739363% of peak. +Median achieved 2.80666794136169 GFLOPs +./sycl_spmm 100000 100000 8 64 j 16 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 8 columns. +Using method j with WG size 16 +OK! +Completed in 0.001157238 s (achieved 50.11933932345809 GB/s) +Achieved 88.48655159958453 GFLOPs +Completed in 0.001158092 s (achieved 50.08238032902394 GB/s) +Achieved 88.42129986218711 GFLOPs +Completed in 0.00115699 s (achieved 50.13008236890552 GB/s) +Achieved 88.50551863023881 GFLOPs +Completed in 0.001158149 s (achieved 50.0799154512934 GB/s) +Achieved 88.41694807835607 GFLOPs +Completed in 0.0011571720000000001 s (achieved 50.12219791007732 GB/s) +Achieved 88.49159848319869 GFLOPs +Completed in 0.001156371 s (achieved 50.156916768061464 GB/s) +Achieved 88.55289522134332 GFLOPs +Completed in 0.0011572940000000001 s (achieved 50.11691411171232 GB/s) +Achieved 88.48226984672866 GFLOPs +Completed in 0.0011572350000000002 s (achieved 50.119469252139794 GB/s) +Achieved 88.48678099089639 GFLOPs +Completed in 0.0011578690000000002 s (achieved 50.09202595457689 GB/s) +Achieved 88.43832937923028 GFLOPs +Completed in 0.001158304 s (achieved 50.0732139403818 GB/s) +Achieved 88.40511644611432 GFLOPs +Durations: [0.001157238, 0.001158092, 0.00115699, 0.001158149, 0.0011571720000000001, 0.001156371, 0.0011572940000000001, 0.0011572350000000002, 0.0011578690000000002, 0.001158304] +Median duration 0.0011572940000000001 (50.11691411171232 GB/s) 10.990551340287789% of peak. +Median achieved 88.48226984672866 GFLOPs +./sycl_spmm 100000 100000 32 64 j 16 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 32 columns. +Using method j with WG size 16 +OK! +Completed in 0.001983854 s (achieved 38.91415598123652 GB/s) +Achieved 206.4668065291095 GFLOPs +Completed in 0.0019831550000000003 s (achieved 38.9278720019363 GB/s) +Achieved 206.53957960925896 GFLOPs +Completed in 0.0019873900000000003 s (achieved 38.8449192156547 GB/s) +Achieved 206.09945707686964 GFLOPs +Completed in 0.001985221 s (achieved 38.88736014781226 GB/s) +Achieved 206.3246358969606 GFLOPs +Completed in 0.0019840870000000003 s (achieved 38.90958612197953 GB/s) +Achieved 206.44256023047376 GFLOPs +Completed in 0.001982835 s (achieved 38.93415437996606 GB/s) +Achieved 206.57291201738926 GFLOPs +Completed in 0.001983126 s (achieved 38.92844125890135 GB/s) +Achieved 206.542599915487 GFLOPs +Completed in 0.001984081 s (achieved 38.90970378729498 GB/s) +Achieved 206.44318452724463 GFLOPs +Completed in 0.0019847750000000003 s (achieved 38.896098550213495 GB/s) +Achieved 206.37099923165093 GFLOPs +Completed in 0.001982722 s (achieved 38.93637332919088 GB/s) +Achieved 206.58468509453166 GFLOPs +Durations: [0.001983854, 0.0019831550000000003, 0.0019873900000000003, 0.001985221, 0.0019840870000000003, 0.001982835, 0.001983126, 0.001984081, 0.0019847750000000003, 0.001982722] +Median duration 0.001984081 (38.90970378729498 GB/s) 8.532829777915564% of peak. +Median achieved 206.44318452724463 GFLOPs +./sycl_spmm 100000 100000 64 64 j 16 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 64 columns. +Using method j with WG size 16 +OK! +Completed in 0.003623971 s (achieved 28.366674015879266 GB/s) +Achieved 226.05037402341245 GFLOPs +Completed in 0.003624808 s (achieved 28.360123901734934 GB/s) +Achieved 225.9981770068925 GFLOPs +Completed in 0.0036232760000000004 s (achieved 28.37211517974341 GB/s) +Achieved 226.09373395788782 GFLOPs +Completed in 0.003623551 s (achieved 28.369961951687724 GB/s) +Achieved 226.07657516066422 GFLOPs +Completed in 0.003623961 s (achieved 28.36675229120843 GB/s) +Achieved 226.05099778943537 GFLOPs +Completed in 0.0036225830000000004 s (achieved 28.377542764375583 GB/s) +Achieved 226.13698568121143 GFLOPs +Completed in 0.00362546 s (achieved 28.35502363837968 GB/s) +Achieved 225.95753366469359 GFLOPs +Completed in 0.003624992 s (achieved 28.35868437778621 GB/s) +Achieved 225.9867056258331 GFLOPs +Completed in 0.003624383 s (achieved 28.36344944780946 GB/s) +Achieved 226.02467785551363 GFLOPs +Completed in 0.003623995 s (achieved 28.36648615685176 GB/s) +Achieved 226.0488769990025 GFLOPs +Durations: [0.003623971, 0.003624808, 0.0036232760000000004, 0.003623551, 0.003623961, 0.0036225830000000004, 0.00362546, 0.003624992, 0.003624383, 0.003623995] +Median duration 0.003623995 (28.36648615685176 GB/s) 6.220720648432403% of peak. +Median achieved 226.0488769990025 GFLOPs +./sycl_spmm 100000 100000 128 64 j 16 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 128 columns. +Using method j with WG size 16 +OK! +Completed in 0.006965362 s (achieved 22.109404220484162 GB/s) +Achieved 235.22108398673322 GFLOPs +Completed in 0.006967489 s (achieved 22.102654772759596 GB/s) +Achieved 235.1492768772222 GFLOPs +Completed in 0.006965331000000001 s (achieved 22.109502620909183 GB/s) +Achieved 235.22213086499406 GFLOPs +Completed in 0.006964509000000001 s (achieved 22.112112138845678 GB/s) +Achieved 235.2498934239298 GFLOPs +Completed in 0.006965642 s (achieved 22.10851548213359 GB/s) +Achieved 235.21162873429327 GFLOPs +Completed in 0.006964973 s (achieved 22.110639050574925 GB/s) +Achieved 235.23422129561737 GFLOPs +Completed in 0.006965342 s (achieved 22.10946770452908 GB/s) +Achieved 235.22175939099617 GFLOPs +Completed in 0.006964452 s (achieved 22.112293113657756 GB/s) +Achieved 235.2518188078545 GFLOPs +Completed in 0.0069657700000000005 s (achieved 22.10810922554147 GB/s) +Achieved 235.20730658635011 GFLOPs +Completed in 0.006966323000000001 s (achieved 22.106354241685317 GB/s) +Achieved 235.18863538196547 GFLOPs +Durations: [0.006965362, 0.006967489, 0.006965331000000001, 0.006964509000000001, 0.006965642, 0.006964973, 0.006965342, 0.006964452, 0.0069657700000000005, 0.006966323000000001] +Median duration 0.006965362 (22.109404220484162 GB/s) 4.8485535571237195% of peak. +Median achieved 235.22108398673322 GFLOPs +./sycl_spmm 100000 100000 256 64 j 16 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 256 columns. +Using method j with WG size 16 +OK! +Completed in 0.013888541 s (achieved 18.461262705708254 GB/s) +Achieved 235.93550971264727 GFLOPs +Completed in 0.013881773 s (achieved 18.470263416639938 GB/s) +Achieved 236.05053907739307 GFLOPs +Completed in 0.013881396 s (achieved 18.470765044092108 GB/s) +Achieved 236.0569498917832 GFLOPs +Completed in 0.013874500000000001 s (achieved 18.479945511549964 GB/s) +Achieved 236.1742765505063 GFLOPs +Completed in 0.013877333 s (achieved 18.476172907287012 GB/s) +Achieved 236.126062551068 GFLOPs +Completed in 0.013878416000000001 s (achieved 18.47473112205312 GB/s) +Achieved 236.10763649108083 GFLOPs +Completed in 0.013887067000000001 s (achieved 18.46322221963788 GB/s) +Achieved 235.96055236141655 GFLOPs +Completed in 0.013882252000000001 s (achieved 18.469626109654254 GB/s) +Achieved 236.04239427435837 GFLOPs +Completed in 0.01387701 s (achieved 18.476602956977043 GB/s) +Achieved 236.13155859943893 GFLOPs +Completed in 0.013885425000000002 s (achieved 18.4654055601467 GB/s) +Achieved 235.98845552080687 GFLOPs +Durations: [0.013888541, 0.013881773, 0.013881396, 0.013874500000000001, 0.013877333, 0.013878416000000001, 0.013887067000000001, 0.013882252000000001, 0.01387701, 0.013885425000000002] +Median duration 0.013881773 (18.470263416639938 GB/s) 4.050496363298231% of peak. +Median achieved 236.05053907739307 GFLOPs +./sycl_spmm 100000 100000 512 64 j 16 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 512 columns. +Using method j with WG size 16 +OK! +Completed in 0.027840752000000003 s (achieved 16.5656446348863 GB/s) +Achieved 235.39594045448197 GFLOPs +Completed in 0.027835227 s (achieved 16.568932741234697 GB/s) +Achieved 235.4426640745556 GFLOPs +Completed in 0.027839369000000003 s (achieved 16.566467580497246 GB/s) +Achieved 235.4076344187255 GFLOPs +Completed in 0.027836781 s (achieved 16.568007773599973 GB/s) +Achieved 235.42952038886966 GFLOPs +Completed in 0.027837842 s (achieved 16.56737630740199 GB/s) +Achieved 235.42054732547157 GFLOPs +Completed in 0.027836624 s (achieved 16.568101218021265 GB/s) +Achieved 235.43084822354896 GFLOPs +Completed in 0.02783849 s (achieved 16.56699066651963 GB/s) +Achieved 235.41506741206152 GFLOPs +Completed in 0.027838196000000003 s (achieved 16.56716563099132 GB/s) +Achieved 235.4175536374555 GFLOPs +Completed in 0.027840034000000003 s (achieved 16.566071866147865 GB/s) +Achieved 235.40201136248612 GFLOPs +Completed in 0.027834594 s (achieved 16.56930954336895 GB/s) +Achieved 235.44801839035267 GFLOPs +Durations: [0.027840752000000003, 0.027835227, 0.027839369000000003, 0.027836781, 0.027837842, 0.027836624, 0.02783849, 0.027838196000000003, 0.027840034000000003, 0.027834594] +Median duration 0.027838196000000003 (16.56716563099132 GB/s) 3.6331503576735344% of peak. +Median achieved 235.4175536374555 GFLOPs +./sycl_spmm 100000 100000 1 128 j 16 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 1 columns. +Using method j with WG size 16 +OK! +Completed in 0.008984242 s (achieved 11.53130158337231 GB/s) +Achieved 2.8494334858744903 GFLOPs +Completed in 0.009067614 s (achieved 11.425277255957301 GB/s) +Achieved 2.823234425285417 GFLOPs +Completed in 0.009000246 s (achieved 11.510796927106215 GB/s) +Achieved 2.8443666984213545 GFLOPs +Completed in 0.009121325000000001 s (achieved 11.357999413462407 GB/s) +Achieved 2.8066097853107963 GFLOPs +Completed in 0.008975738 s (achieved 11.542226834161157 GB/s) +Achieved 2.8521331616408587 GFLOPs +Completed in 0.009013990000000001 s (achieved 11.493245943250436 GB/s) +Achieved 2.840029775937182 GFLOPs +Completed in 0.008989999 s (achieved 11.523917188422379 GB/s) +Achieved 2.8476087705905195 GFLOPs +Completed in 0.008985488 s (achieved 11.529702560395164 GB/s) +Achieved 2.8490383605208756 GFLOPs +Completed in 0.009027223000000001 s (achieved 11.476398001910443 GB/s) +Achieved 2.8358665782378476 GFLOPs +Completed in 0.009016548000000001 s (achieved 11.489985302579212 GB/s) +Achieved 2.8392240578101506 GFLOPs +Durations: [0.008984242, 0.009067614, 0.009000246, 0.009121325000000001, 0.008975738, 0.009013990000000001, 0.008989999, 0.008985488, 0.009027223000000001, 0.009016548000000001] +Median duration 0.009013990000000001 (11.493245943250436 GB/s) 2.5204486717654464% of peak. +Median achieved 2.840029775937182 GFLOPs +./sycl_spmm 100000 100000 8 128 j 16 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 8 columns. +Using method j with WG size 16 +OK! +Completed in 0.002279375 s (achieved 47.90787123663285 GB/s) +Achieved 89.84919111598575 GFLOPs +Completed in 0.002279783 s (achieved 47.899297433132894 GB/s) +Achieved 89.83311130927812 GFLOPs +Completed in 0.002280523 s (achieved 47.883754735207674 GB/s) +Achieved 89.80396163511615 GFLOPs +Completed in 0.0022803840000000003 s (achieved 47.88667347253795 GB/s) +Achieved 89.80943560382812 GFLOPs +Completed in 0.0022809650000000002 s (achieved 47.87447593452771 GB/s) +Achieved 89.78655963594355 GFLOPs +Completed in 0.0022821760000000003 s (achieved 47.8490721136319 GB/s) +Achieved 89.73891584172299 GFLOPs +Completed in 0.0022825830000000004 s (achieved 47.840540300177466 GB/s) +Achieved 89.72291478557405 GFLOPs +Completed in 0.002281513 s (achieved 47.86297689296532 GB/s) +Achieved 89.76499366867513 GFLOPs +Completed in 0.002282117 s (achieved 47.850309164692256 GB/s) +Achieved 89.74123587879149 GFLOPs +Completed in 0.00228215 s (achieved 47.849617246894375 GB/s) +Achieved 89.73993821615582 GFLOPs +Durations: [0.002279375, 0.002279783, 0.002280523, 0.0022803840000000003, 0.0022809650000000002, 0.0022821760000000003, 0.0022825830000000004, 0.002281513, 0.002282117, 0.00228215] +Median duration 0.002281513 (47.86297689296532 GB/s) 10.496266862492394% of peak. +Median achieved 89.76499366867513 GFLOPs +./sycl_spmm 100000 100000 32 128 j 16 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 32 columns. +Using method j with WG size 16 +OK! +Completed in 0.0039325060000000005 s (achieved 32.650936578354866 GB/s) +Achieved 208.31500320660666 GFLOPs +Completed in 0.0039331 s (achieved 32.64600544100074 GB/s) +Achieved 208.28354224403142 GFLOPs +Completed in 0.003933845 s (achieved 32.63982287049948 GB/s) +Achieved 208.2440970602553 GFLOPs +Completed in 0.003932147 s (achieved 32.65391756717132 GB/s) +Achieved 208.3340221003945 GFLOPs +Completed in 0.0039322870000000005 s (achieved 32.6527549998258 GB/s) +Achieved 208.32660484852707 GFLOPs +Completed in 0.003931154 s (achieved 32.66216586783423 GB/s) +Achieved 208.3866467708973 GFLOPs +Completed in 0.003929671 s (achieved 32.674492088523444 GB/s) +Achieved 208.4652888244334 GFLOPs +Completed in 0.003932914 s (achieved 32.647549374331604 GB/s) +Achieved 208.29339263457072 GFLOPs +Completed in 0.003931973 s (achieved 32.65536258768817 GB/s) +Achieved 208.34324142103722 GFLOPs +Completed in 0.003932245 s (achieved 32.653103761337356 GB/s) +Achieved 208.32882996863117 GFLOPs +Durations: [0.0039325060000000005, 0.0039331, 0.003933845, 0.003932147, 0.0039322870000000005, 0.003931154, 0.003929671, 0.003932914, 0.003931973, 0.003932245] +Median duration 0.0039322870000000005 (32.6527549998258 GB/s) 7.160691885926711% of peak. +Median achieved 208.32660484852707 GFLOPs +./sycl_spmm 100000 100000 64 128 j 16 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 64 columns. +Using method j with WG size 16 +OK! +Completed in 0.007211149 s (achieved 21.355820549540717 GB/s) +Achieved 227.20373688021147 GFLOPs +Completed in 0.007211406 s (achieved 21.355059471065697 GB/s) +Achieved 227.19563979617845 GFLOPs +Completed in 0.007208454 s (achieved 21.363804777002112 GB/s) +Achieved 227.28868076289314 GFLOPs +Completed in 0.007212855000000001 s (achieved 21.350769424867128 GB/s) +Achieved 227.149998163002 GFLOPs +Completed in 0.007219078 s (achieved 21.33236460390094 GB/s) +Achieved 226.95418999489962 GFLOPs +Completed in 0.007213186000000001 s (achieved 21.349789676850143 GB/s) +Achieved 227.13957466229206 GFLOPs +Completed in 0.007209328 s (achieved 21.361214803931794 GB/s) +Achieved 227.26112614102175 GFLOPs +Completed in 0.007212469000000001 s (achieved 21.351912084474815 GB/s) +Achieved 227.16215487373324 GFLOPs +Completed in 0.0072096880000000006 s (achieved 21.360148178395512 GB/s) +Achieved 227.24977835379283 GFLOPs +Completed in 0.0072099600000000005 s (achieved 21.359342354187817 GB/s) +Achieved 227.24120522166558 GFLOPs +Durations: [0.007211149, 0.007211406, 0.007208454, 0.007212855000000001, 0.007219078, 0.007213186000000001, 0.007209328, 0.007212469000000001, 0.0072096880000000006, 0.0072099600000000005] +Median duration 0.007211406 (21.355059471065697 GB/s) 4.683127076988091% of peak. +Median achieved 227.19563979617845 GFLOPs +./sycl_spmm 100000 100000 128 128 j 16 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 128 columns. +Using method j with WG size 16 +OK! +Completed in 0.013884439 s (achieved 14.7791354047506 GB/s) +Achieved 236.0052141825824 GFLOPs +Completed in 0.013883954 s (achieved 14.77965167559616 GB/s) +Achieved 236.01345841393598 GFLOPs +Completed in 0.013891823000000001 s (achieved 14.771279766521644 GB/s) +Achieved 235.8797689835236 GFLOPs +Completed in 0.013882707000000001 s (achieved 14.780979242737025 GB/s) +Achieved 236.03465808217373 GFLOPs +Completed in 0.013883626000000001 s (achieved 14.780000844159876 GB/s) +Achieved 236.01903422059914 GFLOPs +Completed in 0.01388581 s (achieved 14.77767620326074 GB/s) +Achieved 235.98191247035643 GFLOPs +Completed in 0.013879876000000001 s (achieved 14.783994035681587 GB/s) +Achieved 236.0828007397184 GFLOPs +Completed in 0.013884049 s (achieved 14.779550547538403 GB/s) +Achieved 236.0118435191348 GFLOPs +Completed in 0.013881054 s (achieved 14.782739408693319 GB/s) +Achieved 236.06276583896295 GFLOPs +Completed in 0.013882898000000001 s (achieved 14.7807758869942 GB/s) +Achieved 236.03141073283115 GFLOPs +Durations: [0.013884439, 0.013883954, 0.013891823000000001, 0.013882707000000001, 0.013883626000000001, 0.01388581, 0.013879876000000001, 0.013884049, 0.013881054, 0.013882898000000001] +Median duration 0.013883954 (14.77965167559616 GB/s) 3.2411516832447718% of peak. +Median achieved 236.01345841393598 GFLOPs +./sycl_spmm 100000 100000 256 128 j 16 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 256 columns. +Using method j with WG size 16 +OK! +Completed in 0.027681118 s (achieved 11.112268081079675 GB/s) +Achieved 236.75344326771773 GFLOPs +Completed in 0.027674236 s (achieved 11.115031468258058 GB/s) +Achieved 236.81231886582162 GFLOPs +Completed in 0.027689331 s (achieved 11.1089720441422 GB/s) +Achieved 236.68321925148715 GFLOPs +Completed in 0.027679694 s (achieved 11.112839758994447 GB/s) +Achieved 236.76562320378252 GFLOPs +Completed in 0.027682807 s (achieved 11.111590092724342 GB/s) +Achieved 236.73899832484474 GFLOPs +Completed in 0.027683850000000003 s (achieved 11.111171459172045 GB/s) +Achieved 236.730079089433 GFLOPs +Completed in 0.027689933000000003 s (achieved 11.108730526722473 GB/s) +Achieved 236.67807358002634 GFLOPs +Completed in 0.027677914 s (achieved 11.113554439109826 GB/s) +Achieved 236.78084988630286 GFLOPs +Completed in 0.027674877 s (achieved 11.114774024108582 GB/s) +Achieved 236.80683386596442 GFLOPs +Completed in 0.027676677 s (achieved 11.114051155780011 GB/s) +Achieved 236.79143272871957 GFLOPs +Durations: [0.027681118, 0.027674236, 0.027689331, 0.027679694, 0.027682807, 0.027683850000000003, 0.027689933000000003, 0.027677914, 0.027674877, 0.027676677] +Median duration 0.027681118 (11.112268081079675 GB/s) 2.4369008949736126% of peak. +Median achieved 236.75344326771773 GFLOPs +./sycl_spmm 100000 100000 512 128 j 16 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 512 columns. +Using method j with WG size 16 +OK! +Completed in 0.055548107000000006 s (achieved 9.22443682914343 GB/s) +Achieved 235.96123626679122 GFLOPs +Completed in 0.055566608 s (achieved 9.221365536654677 GB/s) +Achieved 235.8826725575907 GFLOPs +Completed in 0.055574883000000005 s (achieved 9.219992491932013 GB/s) +Achieved 235.84755005242206 GFLOPs +Completed in 0.055558351000000006 s (achieved 9.222736002369833 GB/s) +Achieved 235.91772909170754 GFLOPs +Completed in 0.055544137 s (achieved 9.225096142910639 GB/s) +Achieved 235.97810152311845 GFLOPs +Completed in 0.055557384 s (achieved 9.222896528029471 GB/s) +Achieved 235.92183534055528 GFLOPs +Completed in 0.055551307 s (achieved 9.223905460946222 GB/s) +Achieved 235.94764386011656 GFLOPs +Completed in 0.055546573 s (achieved 9.224691575482074 GB/s) +Achieved 235.96775268206017 GFLOPs +Completed in 0.055529231000000005 s (achieved 9.22757248340068 GB/s) +Achieved 236.04144635102185 GFLOPs +Completed in 0.055560356000000005 s (achieved 9.222403182585799 GB/s) +Achieved 235.90921555650218 GFLOPs +Durations: [0.055548107000000006, 0.055566608, 0.055574883000000005, 0.055558351000000006, 0.055544137, 0.055557384, 0.055551307, 0.055546573, 0.055529231000000005, 0.055560356000000005] +Median duration 0.055557384 (9.222896528029471 GB/s) 2.0225650280766385% of peak. +Median achieved 235.92183534055528 GFLOPs +./sycl_spmm 100000 100000 1 16 j 32 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 1 columns. +Using method j with WG size 32 +OK! +Completed in 0.001108967 s (achieved 12.624364836825624 GB/s) +Achieved 2.885568281112062 GFLOPs +Completed in 0.001106489 s (achieved 12.652637305928936 GB/s) +Achieved 2.8920305579178827 GFLOPs +Completed in 0.001111197 s (achieved 12.599029695004578 GB/s) +Achieved 2.8797773932075055 GFLOPs +Completed in 0.00110642 s (achieved 12.653426366117749 GB/s) +Achieved 2.8922109144809385 GFLOPs +Completed in 0.001108793 s (achieved 12.626345945546193 GB/s) +Achieved 2.8860211058330996 GFLOPs +Completed in 0.001112272 s (achieved 12.58685285613591 GB/s) +Achieved 2.8769941165470314 GFLOPs +Completed in 0.001107949 s (achieved 12.635964290775119 GB/s) +Achieved 2.8882195841144314 GFLOPs +Completed in 0.001110422 s (achieved 12.607822971807115 GB/s) +Achieved 2.881787284473831 GFLOPs +Completed in 0.0011104840000000001 s (achieved 12.607119057996334 GB/s) +Achieved 2.8816263899344787 GFLOPs +Completed in 0.0011068270000000001 s (achieved 12.648773475890993 GB/s) +Achieved 2.8911473970186847 GFLOPs +Durations: [0.001108967, 0.001106489, 0.001111197, 0.00110642, 0.001108793, 0.001112272, 0.001107949, 0.001110422, 0.0011104840000000001, 0.0011068270000000001] +Median duration 0.001108967 (12.624364836825624 GB/s) 2.7685010607073735% of peak. +Median achieved 2.885568281112062 GFLOPs +./sycl_spmm 100000 100000 8 16 j 32 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 8 columns. +Using method j with WG size 32 +OK! +Completed in 0.000317531 s (achieved 61.72626924615234 GB/s) +Achieved 80.62204950067867 GFLOPs +Completed in 0.000319153 s (achieved 61.412563880019924 GB/s) +Achieved 80.21231196322766 GFLOPs +Completed in 0.000317884 s (achieved 61.65772420128097 GB/s) +Achieved 80.53252129707693 GFLOPs +Completed in 0.000318972 s (achieved 61.44741231205247 GB/s) +Achieved 80.25782827332807 GFLOPs +Completed in 0.00031662300000000003 s (achieved 61.903285610963195 GB/s) +Achieved 80.85325450141019 GFLOPs +Completed in 0.000317316 s (achieved 61.76809237479358 GB/s) +Achieved 80.67667561673537 GFLOPs +Completed in 0.00031691 s (achieved 61.84722476412862 GB/s) +Achieved 80.78003218579408 GFLOPs +Completed in 0.000317426 s (achieved 61.74668741690977 GB/s) +Achieved 80.64871812642947 GFLOPs +Completed in 0.00031825500000000004 s (achieved 61.58584782642849 GB/s) +Achieved 80.43864196948986 GFLOPs +Completed in 0.000317788 s (achieved 61.676350271250016 GB/s) +Achieved 80.55684922023488 GFLOPs +Durations: [0.000317531, 0.000319153, 0.000317884, 0.000318972, 0.00031662300000000003, 0.000317316, 0.00031691, 0.000317426, 0.00031825500000000004, 0.000317788] +Median duration 0.000317788 (61.676350271250016 GB/s) 13.525515410361844% of peak. +Median achieved 80.55684922023488 GFLOPs +./sycl_spmm 100000 100000 32 16 j 32 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 32 columns. +Using method j with WG size 32 +OK! +Completed in 0.000365822 s (achieved 106.06252221025525 GB/s) +Achieved 279.9175555324721 GFLOPs +Completed in 0.00036463 s (achieved 106.4092477305762 GB/s) +Achieved 280.8326248525903 GFLOPs +Completed in 0.000365144 s (achieved 106.25945928181757 GB/s) +Achieved 280.43730692548695 GFLOPs +Completed in 0.00036526800000000003 s (achieved 106.22338666403844 GB/s) +Achieved 280.34210497497725 GFLOPs +Completed in 0.000364674 s (achieved 106.39640884735407 GB/s) +Achieved 280.79874079314675 GFLOPs +Completed in 0.00036465300000000005 s (achieved 106.40253610967137 GB/s) +Achieved 280.81491171058514 GFLOPs +Completed in 0.000363473 s (achieved 106.74796752441034 GB/s) +Achieved 281.72656565962257 GFLOPs +Completed in 0.000364547 s (achieved 106.43347497030561 GB/s) +Achieved 280.89656477765556 GFLOPs +Completed in 0.00036523 s (achieved 106.23443857295402 GB/s) +Achieved 280.37127289653097 GFLOPs +Completed in 0.00036465200000000003 s (achieved 106.40282790167062 GB/s) +Achieved 280.81568180073054 GFLOPs +Durations: [0.000365822, 0.00036463, 0.000365144, 0.00036526800000000003, 0.000364674, 0.00036465300000000005, 0.000363473, 0.000364547, 0.00036523, 0.00036465200000000003] +Median duration 0.000364674 (106.39640884735407 GB/s) 23.332545799858348% of peak. +Median achieved 280.79874079314675 GFLOPs +./sycl_spmm 100000 100000 64 16 j 32 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 64 columns. +Using method j with WG size 32 +OK! +Completed in 0.000614183 s (achieved 104.85474850329626 GB/s) +Achieved 333.45110496383 GFLOPs +Completed in 0.000614154 s (achieved 104.85969968444398 GB/s) +Achieved 333.46685033395534 GFLOPs +Completed in 0.000614377 s (achieved 104.82163883088073 GB/s) +Achieved 333.3458120990857 GFLOPs +Completed in 0.00061331 s (achieved 105.00400123917758 GB/s) +Achieved 333.92574717516425 GFLOPs +Completed in 0.000614436 s (achieved 104.81157354061287 GB/s) +Achieved 333.31380322767546 GFLOPs +Completed in 0.000615065 s (achieved 104.70438734117533 GB/s) +Achieved 332.97293781958007 GFLOPs +Completed in 0.000613673 s (achieved 104.94188924720495 GB/s) +Achieved 333.7282233371845 GFLOPs +Completed in 0.0006153500000000001 s (achieved 104.65589339400341 GB/s) +Achieved 332.8187210530592 GFLOPs +Completed in 0.00061415 s (achieved 104.86038264267688 GB/s) +Achieved 333.4690222258406 GFLOPs +Completed in 0.0006147300000000001 s (achieved 104.76144648870236 GB/s) +Achieved 333.1543929855383 GFLOPs +Durations: [0.000614183, 0.000614154, 0.000614377, 0.00061331, 0.000614436, 0.000615065, 0.000613673, 0.0006153500000000001, 0.00061415, 0.0006147300000000001] +Median duration 0.000614377 (104.82163883088073 GB/s) 22.987201498000157% of peak. +Median achieved 333.3458120990857 GFLOPs +./sycl_spmm 100000 100000 128 16 j 32 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 128 columns. +Using method j with WG size 32 +OK! +Completed in 0.001094801 s (achieved 105.58996931862502 GB/s) +Achieved 374.1319198648887 GFLOPs +Completed in 0.001094944 s (achieved 105.57617923839028 GB/s) +Achieved 374.0830581290002 GFLOPs +Completed in 0.001094624 s (achieved 105.60704314906306 GB/s) +Achieved 374.19241675680416 GFLOPs +Completed in 0.001095141 s (achieved 105.55718761328451 GB/s) +Achieved 374.0157660063864 GFLOPs +Completed in 0.0010944680000000001 s (achieved 105.6220958493076 GB/s) +Achieved 374.2457522741642 GFLOPs +Completed in 0.0010953 s (achieved 105.54186432940747 GB/s) +Achieved 373.9614717429015 GFLOPs +Completed in 0.0010950460000000001 s (achieved 105.56634515810295 GB/s) +Achieved 374.04821349970683 GFLOPs +Completed in 0.0010941990000000001 s (achieved 105.64806218978448 GB/s) +Achieved 374.33775757426207 GFLOPs +Completed in 0.001095141 s (achieved 105.55718761328451 GB/s) +Achieved 374.0157660063864 GFLOPs +Completed in 0.001093391 s (achieved 105.72613456668293 GB/s) +Achieved 374.61438771674545 GFLOPs +Durations: [0.001094801, 0.001094944, 0.001094624, 0.001095141, 0.0010944680000000001, 0.0010953, 0.0010950460000000001, 0.0010941990000000001, 0.001095141, 0.001093391] +Median duration 0.001094944 (105.57617923839028 GB/s) 23.1526708856119% of peak. +Median achieved 374.0830581290002 GFLOPs +./sycl_spmm 100000 100000 256 16 j 32 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 256 columns. +Using method j with WG size 32 +OK! +Completed in 0.002146755 s (achieved 101.54861826337891 GB/s) +Achieved 381.5992043805651 GFLOPs +Completed in 0.0021485040000000003 s (achieved 101.46595212296556 GB/s) +Achieved 381.28856171550063 GFLOPs +Completed in 0.002146518 s (achieved 101.55983038576896 GB/s) +Achieved 381.6413372727366 GFLOPs +Completed in 0.002148035 s (achieved 101.48810610627854 GB/s) +Achieved 381.3718119118171 GFLOPs +Completed in 0.002148178 s (achieved 101.48135024192595 GB/s) +Achieved 381.3464247376148 GFLOPs +Completed in 0.0021485560000000003 s (achieved 101.4634964134051 GB/s) +Achieved 381.2793336547895 GFLOPs +Completed in 0.002145966 s (achieved 101.5859542975052 GB/s) +Achieved 381.73950565852397 GFLOPs +Completed in 0.002146648 s (achieved 101.55367996988794 GB/s) +Achieved 381.61822525164814 GFLOPs +Completed in 0.002147177 s (achieved 101.52866018963505 GB/s) +Achieved 381.5242059690468 GFLOPs +Completed in 0.002146934 s (achieved 101.54015167676324 GB/s) +Achieved 381.56738865749946 GFLOPs +Durations: [0.002146755, 0.0021485040000000003, 0.002146518, 0.002148035, 0.002148178, 0.0021485560000000003, 0.002145966, 0.002146648, 0.002147177, 0.002146934] +Median duration 0.002147177 (101.52866018963505 GB/s) 22.26505705913049% of peak. +Median achieved 381.5242059690468 GFLOPs +./sycl_spmm 100000 100000 512 16 j 32 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 512 columns. +Using method j with WG size 32 +OK! +Completed in 0.004290946 s (achieved 98.53305168603846 GB/s) +Achieved 381.8272241132841 GFLOPs +Completed in 0.00429075 s (achieved 98.53755264231195 GB/s) +Achieved 381.84466585095845 GFLOPs +Completed in 0.004292194 s (achieved 98.50440217753437 GB/s) +Achieved 381.7162038808125 GFLOPs +Completed in 0.004289122 s (achieved 98.57495403488173 GB/s) +Achieved 381.98960066885485 GFLOPs +Completed in 0.00428949 s (achieved 98.56649718264876 GB/s) +Achieved 381.95682936666134 GFLOPs +Completed in 0.004289145 s (achieved 98.57442543910265 GB/s) +Achieved 381.987552297719 GFLOPs +Completed in 0.004290945 s (achieved 98.5330746490575 GB/s) +Achieved 381.8273130976976 GFLOPs +Completed in 0.0042885250000000005 s (achieved 98.58867652631149 GB/s) +Achieved 382.0427769454532 GFLOPs +Completed in 0.004292649 s (achieved 98.49396118806825 GB/s) +Achieved 381.6757438122707 GFLOPs +Completed in 0.004290309 s (achieved 98.54768129754757 GB/s) +Achieved 381.88391558743206 GFLOPs +Durations: [0.004290946, 0.00429075, 0.004292194, 0.004289122, 0.00428949, 0.004289145, 0.004290945, 0.0042885250000000005, 0.004292649, 0.004290309] +Median duration 0.00429075 (98.53755264231195 GB/s) 21.609112421559637% of peak. +Median achieved 381.84466585095845 GFLOPs +./sycl_spmm 100000 100000 1 32 j 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 1 columns. +Using method j with WG size 32 +OK! +Completed in 0.0022795980000000003 s (achieved 11.75646056892487 GB/s) +Achieved 2.807512552651827 GFLOPs +Completed in 0.002289288 s (achieved 11.706698327165478 GB/s) +Achieved 2.7956290340053327 GFLOPs +Completed in 0.0022848 s (achieved 11.729693627450981 GB/s) +Achieved 2.801120448179272 GFLOPs +Completed in 0.002294106 s (achieved 11.682112334826726 GB/s) +Achieved 2.7897577531291056 GFLOPs +Completed in 0.002290575 s (achieved 11.700120712048285 GB/s) +Achieved 2.794058260480447 GFLOPs +Completed in 0.0022921540000000002 s (achieved 11.692060830118743 GB/s) +Achieved 2.7921335128442504 GFLOPs +Completed in 0.0023023030000000003 s (achieved 11.640519948938085 GB/s) +Achieved 2.7798252445486105 GFLOPs +Completed in 0.002293559 s (achieved 11.684898448219558 GB/s) +Achieved 2.790423093541522 GFLOPs +Completed in 0.002301142 s (achieved 11.646392964884393 GB/s) +Achieved 2.7812277556100407 GFLOPs +Completed in 0.0022914650000000003 s (achieved 11.695576410724144 GB/s) +Achieved 2.7929730543560556 GFLOPs +Durations: [0.0022795980000000003, 0.002289288, 0.0022848, 0.002294106, 0.002290575, 0.0022921540000000002, 0.0023023030000000003, 0.002293559, 0.002301142, 0.0022914650000000003] +Median duration 0.0022921540000000002 (11.692060830118743 GB/s) 2.564048427657619% of peak. +Median achieved 2.7921335128442504 GFLOPs +./sycl_spmm 100000 100000 8 32 j 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 8 columns. +Using method j with WG size 32 +OK! +Completed in 0.000603331 s (achieved 53.70187177519471 GB/s) +Achieved 84.86220664941798 GFLOPs +Completed in 0.00059915 s (achieved 54.076615204873576 GB/s) +Achieved 85.45439372444297 GFLOPs +Completed in 0.000599605 s (achieved 54.03558009022607 GB/s) +Achieved 85.38954811917846 GFLOPs +Completed in 0.0005997800000000001 s (achieved 54.01981393177498 GB/s) +Achieved 85.36463369902297 GFLOPs +Completed in 0.0006009520000000001 s (achieved 53.91446238634699 GB/s) +Achieved 85.19815226507275 GFLOPs +Completed in 0.000600508 s (achieved 53.9543253378806 GB/s) +Achieved 85.26114556342297 GFLOPs +Completed in 0.000599152 s (achieved 54.07643469436804 GB/s) +Achieved 85.45410847330893 GFLOPs +Completed in 0.000601092 s (achieved 53.90190519920412 GB/s) +Achieved 85.17830881129677 GFLOPs +Completed in 0.000599344 s (achieved 54.05911129501589 GB/s) +Achieved 85.42673322832964 GFLOPs +Completed in 0.000601706 s (achieved 53.84690197538333 GB/s) +Achieved 85.09139014734771 GFLOPs +Durations: [0.000603331, 0.00059915, 0.000599605, 0.0005997800000000001, 0.0006009520000000001, 0.000600508, 0.000599152, 0.000601092, 0.000599344, 0.000601706] +Median duration 0.000600508 (53.9543253378806 GB/s) 11.832088889886096% of peak. +Median achieved 85.26114556342297 GFLOPs +./sycl_spmm 100000 100000 32 32 j 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 32 columns. +Using method j with WG size 32 +OK! +Completed in 0.0006976640000000001 s (achieved 73.96111021924594 GB/s) +Achieved 293.55105036235204 GFLOPs +Completed in 0.0006986750000000001 s (achieved 73.85408666404265 GB/s) +Achieved 293.12627473431854 GFLOPs +Completed in 0.000698673 s (achieved 73.85429807649645 GB/s) +Achieved 293.12711382864376 GFLOPs +Completed in 0.000697364 s (achieved 73.9929276532772 GB/s) +Achieved 293.67733350158596 GFLOPs +Completed in 0.000696936 s (achieved 74.03836794196312 GB/s) +Achieved 293.85768564114926 GFLOPs +Completed in 0.000696677 s (achieved 74.06589280254695 GB/s) +Achieved 293.96693159096685 GFLOPs +Completed in 0.000697528 s (achieved 73.97553073138283 GB/s) +Achieved 293.60828525879964 GFLOPs +Completed in 0.000696615 s (achieved 74.07248480150442 GB/s) +Achieved 293.9930951817001 GFLOPs +Completed in 0.000698609 s (achieved 73.86106391414941 GB/s) +Achieved 293.1539673837583 GFLOPs +Completed in 0.000698852 s (achieved 73.83538145415625 GB/s) +Achieved 293.0520339070361 GFLOPs +Durations: [0.0006976640000000001, 0.0006986750000000001, 0.000698673, 0.000697364, 0.000696936, 0.000696677, 0.000697528, 0.000696615, 0.000698609, 0.000698852] +Median duration 0.0006976640000000001 (73.96111021924594 GB/s) 16.219541714746917% of peak. +Median achieved 293.55105036235204 GFLOPs +./sycl_spmm 100000 100000 64 32 j 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 64 columns. +Using method j with WG size 32 +OK! +Completed in 0.0011962000000000001 s (achieved 64.53770606921918 GB/s) +Achieved 342.41765591038285 GFLOPs +Completed in 0.00119527 s (achieved 64.58792072084131 GB/s) +Achieved 342.6840797476721 GFLOPs +Completed in 0.0011962840000000001 s (achieved 64.53317439671515 GB/s) +Achieved 342.393612219172 GFLOPs +Completed in 0.0011962910000000002 s (achieved 64.53279678606626 GB/s) +Achieved 342.3916087306516 GFLOPs +Completed in 0.001195621 s (achieved 64.56895956160021 GB/s) +Achieved 342.5834775401235 GFLOPs +Completed in 0.001197953 s (achieved 64.443266138154 GB/s) +Achieved 341.91658604302506 GFLOPs +Completed in 0.001197488 s (achieved 64.46829028766885 GB/s) +Achieved 342.0493566532608 GFLOPs +Completed in 0.001195117 s (achieved 64.59618932707006 GB/s) +Achieved 342.7279504851826 GFLOPs +Completed in 0.001195866 s (achieved 64.55573116051464 GB/s) +Achieved 342.51329162297446 GFLOPs +Completed in 0.001196046 s (achieved 64.5460157886904 GB/s) +Achieved 342.4617447823913 GFLOPs +Durations: [0.0011962000000000001, 0.00119527, 0.0011962840000000001, 0.0011962910000000002, 0.001195621, 0.001197953, 0.001197488, 0.001195117, 0.001195866, 0.001196046] +Median duration 0.0011962000000000001 (64.53770606921918 GB/s) 14.15300571693403% of peak. +Median achieved 342.41765591038285 GFLOPs +./sycl_spmm 100000 100000 128 32 j 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 128 columns. +Using method j with WG size 32 +OK! +Completed in 0.002139621 s (achieved 60.01062991997181 GB/s) +Achieved 382.87154594201496 GFLOPs +Completed in 0.002139626 s (achieved 60.010489683711086 GB/s) +Achieved 382.8706512259619 GFLOPs +Completed in 0.0021398100000000002 s (achieved 60.005329445137654 GB/s) +Achieved 382.83772858337886 GFLOPs +Completed in 0.002142506 s (achieved 59.929822366891855 GB/s) +Achieved 382.3559887346873 GFLOPs +Completed in 0.002141706 s (achieved 59.9522081929079 GB/s) +Achieved 382.49881169497587 GFLOPs +Completed in 0.002139022 s (achieved 60.0274349679433 GB/s) +Achieved 382.97876319177647 GFLOPs +Completed in 0.00214065 s (achieved 59.981783103263034 GB/s) +Achieved 382.687501459837 GFLOPs +Completed in 0.002139928 s (achieved 60.00202062873144 GB/s) +Achieved 382.81661812920805 GFLOPs +Completed in 0.002141557 s (achieved 59.95637940059499 GB/s) +Achieved 382.5254242590788 GFLOPs +Completed in 0.0021410970000000002 s (achieved 59.969260617337746 GB/s) +Achieved 382.60760722190537 GFLOPs +Durations: [0.002139621, 0.002139626, 0.0021398100000000002, 0.002142506, 0.002141706, 0.002139022, 0.00214065, 0.002139928, 0.002141557, 0.0021410970000000002] +Median duration 0.00214065 (59.981783103263034 GB/s) 13.153899803347155% of peak. +Median achieved 382.687501459837 GFLOPs +./sycl_spmm 100000 100000 256 32 j 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 256 columns. +Using method j with WG size 32 +OK! +Completed in 0.004208041 s (achieved 54.847375298862346 GB/s) +Achieved 389.3498185972998 GFLOPs +Completed in 0.004210107 s (achieved 54.82046038259835 GB/s) +Achieved 389.15875534754815 GFLOPs +Completed in 0.00420932 s (achieved 54.83070994840022 GB/s) +Achieved 389.2315148289985 GFLOPs +Completed in 0.0042081950000000005 s (achieved 54.84536814477465 GB/s) +Achieved 389.33557023854644 GFLOPs +Completed in 0.004209252 s (achieved 54.83159573244843 GB/s) +Achieved 389.23780282102376 GFLOPs +Completed in 0.0042087570000000005 s (achieved 54.83804458180883 GB/s) +Achieved 389.2835818271285 GFLOPs +Completed in 0.004209812 s (achieved 54.824301892816116 GB/s) +Achieved 389.1860254092107 GFLOPs +Completed in 0.004210612 s (achieved 54.81388548743033 GB/s) +Achieved 389.1120815691401 GFLOPs +Completed in 0.004207939 s (achieved 54.84870479348679 GB/s) +Achieved 389.3592563960647 GFLOPs +Completed in 0.004208075 s (achieved 54.84693214831009 GB/s) +Achieved 389.346672766051 GFLOPs +Durations: [0.004208041, 0.004210107, 0.00420932, 0.0042081950000000005, 0.004209252, 0.0042087570000000005, 0.004209812, 0.004210612, 0.004207939, 0.004208075] +Median duration 0.004209252 (54.83159573244843 GB/s) 12.024472748343953% of peak. +Median achieved 389.23780282102376 GFLOPs +./sycl_spmm 100000 100000 512 32 j 32 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 512 columns. +Using method j with WG size 32 +OK! +Completed in 0.008470512000000001 s (achieved 51.42546330139193 GB/s) +Achieved 386.84792607577907 GFLOPs +Completed in 0.008471484000000001 s (achieved 51.419562853450465 GB/s) +Achieved 386.8035399700926 GFLOPs +Completed in 0.008467665000000001 s (achieved 51.44275358082777 GB/s) +Achieved 386.9779921619478 GFLOPs +Completed in 0.008468678 s (achieved 51.43660013995101 GB/s) +Achieved 386.9317029175038 GFLOPs +Completed in 0.008468566 s (achieved 51.43728040851308 GB/s) +Achieved 386.9368202361533 GFLOPs +Completed in 0.008468952 s (achieved 51.434935987357115 GB/s) +Achieved 386.9191843335516 GFLOPs +Completed in 0.008468189000000001 s (achieved 51.43957037331122 GB/s) +Achieved 386.9540464909321 GFLOPs +Completed in 0.008467532 s (achieved 51.443561595043285 GB/s) +Achieved 386.98407044697325 GFLOPs +Completed in 0.008472429 s (achieved 51.413827604810855 GB/s) +Achieved 386.7603965757636 GFLOPs +Completed in 0.008472624 s (achieved 51.41264430004212 GB/s) +Achieved 386.75149516843896 GFLOPs +Durations: [0.008470512000000001, 0.008471484000000001, 0.008467665000000001, 0.008468678, 0.008468566, 0.008468952, 0.008468189000000001, 0.008467532, 0.008472429, 0.008472624] +Median duration 0.008468952 (51.434935987357115 GB/s) 11.279591225297612% of peak. +Median achieved 386.9191843335516 GFLOPs +./sycl_spmm 100000 100000 1 64 j 32 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 1 columns. +Using method j with WG size 32 +OK! +Completed in 0.004537057000000001 s (achieved 11.549337819648287 GB/s) +Achieved 2.8212120764627815 GFLOPs +Completed in 0.004554706 s (achieved 11.504585367310206 GB/s) +Achieved 2.810280180542937 GFLOPs +Completed in 0.004575115 s (achieved 11.453264890609306 GB/s) +Achieved 2.7977438818477784 GFLOPs +Completed in 0.004573062 s (achieved 11.458406643076346 GB/s) +Achieved 2.7989998823545363 GFLOPs +Completed in 0.004509226 s (achieved 11.620620478991295 GB/s) +Achieved 2.8386246331410314 GFLOPs +Completed in 0.00455399 s (achieved 11.5063941730219 GB/s) +Achieved 2.8107220261792407 GFLOPs +Completed in 0.004582753 s (achieved 11.434175920020127 GB/s) +Achieved 2.793080927555991 GFLOPs +Completed in 0.0045626320000000005 s (achieved 11.484600116774702 GB/s) +Achieved 2.80539828765502 GFLOPs +Completed in 0.004530551 s (achieved 11.565922997004117 GB/s) +Achieved 2.8252634171870046 GFLOPs +Completed in 0.0045380460000000004 s (achieved 11.54682081230556 GB/s) +Achieved 2.8205972350214163 GFLOPs +Durations: [0.004537057000000001, 0.004554706, 0.004575115, 0.004573062, 0.004509226, 0.00455399, 0.004582753, 0.0045626320000000005, 0.004530551, 0.0045380460000000004] +Median duration 0.004554706 (11.504585367310206 GB/s) 2.5229353875680274% of peak. +Median achieved 2.810280180542937 GFLOPs +./sycl_spmm 100000 100000 8 64 j 32 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 8 columns. +Using method j with WG size 32 +OK! +Completed in 0.0011594630000000001 s (achieved 50.02316072181691 GB/s) +Achieved 88.31674663184594 GFLOPs +Completed in 0.0011583890000000001 s (achieved 50.06953967967582 GB/s) +Achieved 88.39862947593598 GFLOPs +Completed in 0.001157639 s (achieved 50.101978250559974 GB/s) +Achieved 88.45590032816794 GFLOPs +Completed in 0.001159272 s (achieved 50.03140246637545 GB/s) +Achieved 88.33129757295958 GFLOPs +Completed in 0.001159063 s (achieved 50.04042403217081 GB/s) +Achieved 88.34722530181706 GFLOPs +Completed in 0.001158457 s (achieved 50.06660065932529 GB/s) +Achieved 88.3934405851922 GFLOPs +Completed in 0.001157758 s (achieved 50.09682852547769 GB/s) +Achieved 88.44680840037383 GFLOPs +Completed in 0.001158657 s (achieved 50.05795848124165 GB/s) +Achieved 88.37818267183472 GFLOPs +Completed in 0.001157201 s (achieved 50.120941824281175 GB/s) +Achieved 88.48938084222188 GFLOPs +Completed in 0.001157821 s (achieved 50.09410262899014 GB/s) +Achieved 88.44199578345875 GFLOPs +Durations: [0.0011594630000000001, 0.0011583890000000001, 0.001157639, 0.001159272, 0.001159063, 0.001158457, 0.001157758, 0.001158657, 0.001157201, 0.001157821] +Median duration 0.001158457 (50.06660065932529 GB/s) 10.979517688448528% of peak. +Median achieved 88.3934405851922 GFLOPs +./sycl_spmm 100000 100000 32 64 j 32 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 32 columns. +Using method j with WG size 32 +OK! +Completed in 0.001376534 s (achieved 56.0828893438157 GB/s) +Achieved 297.55894151542935 GFLOPs +Completed in 0.001377435 s (achieved 56.04620472109392 GB/s) +Achieved 297.3643039417468 GFLOPs +Completed in 0.001377013 s (achieved 56.06338066525153 GB/s) +Achieved 297.4554343350426 GFLOPs +Completed in 0.001375382 s (achieved 56.12986355790609 GB/s) +Achieved 297.80817256587625 GFLOPs +Completed in 0.001376943 s (achieved 56.06623077353238 GB/s) +Achieved 297.47055615228805 GFLOPs +Completed in 0.001376402 s (achieved 56.08826781710576 GB/s) +Achieved 297.58747807689906 GFLOPs +Completed in 0.001375651 s (achieved 56.11888771207232 GB/s) +Achieved 297.7499380293403 GFLOPs +Completed in 0.001376405 s (achieved 56.088145567619996 GB/s) +Achieved 297.5868294578994 GFLOPs +Completed in 0.0013766120000000001 s (achieved 56.0797116398811 GB/s) +Achieved 297.54208157418356 GFLOPs +Completed in 0.001376613 s (achieved 56.07967090242501 GB/s) +Achieved 297.5418654334951 GFLOPs +Durations: [0.001376534, 0.001377435, 0.001377013, 0.001375382, 0.001376943, 0.001376402, 0.001375651, 0.001376405, 0.0013766120000000001, 0.001376613] +Median duration 0.0013766120000000001 (56.0797116398811 GB/s) 12.298182377166906% of peak. +Median achieved 297.54208157418356 GFLOPs +./sycl_spmm 100000 100000 64 64 j 32 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 64 columns. +Using method j with WG size 32 +OK! +Completed in 0.002376052 s (achieved 43.265048071338505 GB/s) +Achieved 344.7735992309933 GFLOPs +Completed in 0.002373168 s (achieved 43.31762605934346 GB/s) +Achieved 345.19258644984257 GFLOPs +Completed in 0.002374221 s (achieved 43.29841409034795 GB/s) +Achieved 345.039488741781 GFLOPs +Completed in 0.0023754400000000004 s (achieved 43.2761947260297 GB/s) +Achieved 344.8624254874886 GFLOPs +Completed in 0.002375358 s (achieved 43.277688668402824 GB/s) +Achieved 344.87433052196764 GFLOPs +Completed in 0.002374855 s (achieved 43.28685498693605 GB/s) +Achieved 344.9473757345186 GFLOPs +Completed in 0.002375826 s (achieved 43.26916365087342 GB/s) +Achieved 344.80639575457127 GFLOPs +Completed in 0.002376675 s (achieved 43.253706964561836 GB/s) +Achieved 344.68322341085764 GFLOPs +Completed in 0.0023751600000000003 s (achieved 43.281296417925525 GB/s) +Achieved 344.9030802135435 GFLOPs +Completed in 0.002373697 s (achieved 43.3079723317677 GB/s) +Achieved 345.11565713736843 GFLOPs +Durations: [0.002376052, 0.002373168, 0.002374221, 0.0023754400000000004, 0.002375358, 0.002374855, 0.002375826, 0.002376675, 0.0023751600000000003, 0.002373697] +Median duration 0.002375358 (43.277688668402824 GB/s) 9.490721199211146% of peak. +Median achieved 344.87433052196764 GFLOPs +./sycl_spmm 100000 100000 128 64 j 32 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 128 columns. +Using method j with WG size 32 +OK! +Completed in 0.0042349480000000005 s (achieved 36.36408380929352 GB/s) +Achieved 386.87606081585886 GFLOPs +Completed in 0.004235545 s (achieved 36.358958292262265 GB/s) +Achieved 386.8215306412752 GFLOPs +Completed in 0.004235194 s (achieved 36.36197161216227 GB/s) +Achieved 386.8535892334566 GFLOPs +Completed in 0.00423512 s (achieved 36.36260696273069 GB/s) +Achieved 386.8603487032245 GFLOPs +Completed in 0.0042364 s (achieved 36.35162024360306 GB/s) +Achieved 386.7434614295156 GFLOPs +Completed in 0.004235636 s (achieved 36.35817714270065 GB/s) +Achieved 386.81322002173937 GFLOPs +Completed in 0.004237389 s (achieved 36.34313583199465 GB/s) +Achieved 386.65319610731984 GFLOPs +Completed in 0.004236837 s (achieved 36.34787082911143 GB/s) +Achieved 386.7035715558564 GFLOPs +Completed in 0.004236056 s (achieved 36.354572271943525 GB/s) +Achieved 386.7748679432 GFLOPs +Completed in 0.0042372 s (achieved 36.34475691494383 GB/s) +Achieved 386.67044274520913 GFLOPs +Durations: [0.0042349480000000005, 0.004235545, 0.004235194, 0.00423512, 0.0042364, 0.004235636, 0.004237389, 0.004236837, 0.004236056, 0.0042372] +Median duration 0.004236056 (36.354572271943525 GB/s) 7.97249391928586% of peak. +Median achieved 386.7748679432 GFLOPs +./sycl_spmm 100000 100000 256 64 j 32 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 256 columns. +Using method j with WG size 32 +OK! +Completed in 0.008335555 s (achieved 30.75979991734204 GB/s) +Achieved 393.11119655499846 GFLOPs +Completed in 0.008334253 s (achieved 30.764605298159296 GB/s) +Achieved 393.1726094708188 GFLOPs +Completed in 0.008332428000000001 s (achieved 30.77134347875553 GB/s) +Achieved 393.25872362773487 GFLOPs +Completed in 0.008333569 s (achieved 30.767130385552697 GB/s) +Achieved 393.20488016598887 GFLOPs +Completed in 0.008331473 s (achieved 30.774870662126613 GB/s) +Achieved 393.30380114056663 GFLOPs +Completed in 0.008330986 s (achieved 30.776669652307664 GB/s) +Achieved 393.3267922908525 GFLOPs +Completed in 0.008334731 s (achieved 30.762840936318163 GB/s) +Achieved 393.15006087179063 GFLOPs +Completed in 0.008332103 s (achieved 30.77254373835753 GB/s) +Achieved 393.27406298265873 GFLOPs +Completed in 0.008334567000000001 s (achieved 30.763446259415755 GB/s) +Achieved 393.157796919744 GFLOPs +Completed in 0.008332197000000001 s (achieved 30.7721965767252 GB/s) +Achieved 393.2696262462349 GFLOPs +Durations: [0.008335555, 0.008334253, 0.008332428000000001, 0.008333569, 0.008331473, 0.008330986, 0.008334731, 0.008332103, 0.008334567000000001, 0.008332197000000001] +Median duration 0.008333569 (30.767130385552697 GB/s) 6.747177716129977% of peak. +Median achieved 393.20488016598887 GFLOPs +./sycl_spmm 100000 100000 512 64 j 32 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 512 columns. +Using method j with WG size 32 +OK! +Completed in 0.016827082 s (achieved 27.408198521882763 GB/s) +Achieved 389.4674073615378 GFLOPs +Completed in 0.016826137 s (achieved 27.409737838221574 GB/s) +Achieved 389.48928087296565 GFLOPs +Completed in 0.016823204 s (achieved 27.41451652134754 GB/s) +Achieved 389.5571854208033 GFLOPs +Completed in 0.016820706 s (achieved 27.418587781036063 GB/s) +Achieved 389.6150375614436 GFLOPs +Completed in 0.016825444000000002 s (achieved 27.410866780098047 GB/s) +Achieved 389.5053230096038 GFLOPs +Completed in 0.016825265000000002 s (achieved 27.411158397802353 GB/s) +Achieved 389.5094668642663 GFLOPs +Completed in 0.016826802000000002 s (achieved 27.408654597587823 GB/s) +Achieved 389.47388814582825 GFLOPs +Completed in 0.016824804000000002 s (achieved 27.41190946414591 GB/s) +Achieved 389.52013943223346 GFLOPs +Completed in 0.016827268 s (achieved 27.4078955656973 GB/s) +Achieved 389.4631023883378 GFLOPs +Completed in 0.016836296 s (achieved 27.39319883660872 GB/s) +Achieved 389.254263526847 GFLOPs +Durations: [0.016827082, 0.016826137, 0.016823204, 0.016820706, 0.016825444000000002, 0.016825265000000002, 0.016826802000000002, 0.016824804000000002, 0.016827268, 0.016836296] +Median duration 0.016826137 (27.409737838221574 GB/s) 6.010907420662625% of peak. +Median achieved 389.48928087296565 GFLOPs +./sycl_spmm 100000 100000 1 128 j 32 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 1 columns. +Using method j with WG size 32 +OK! +Completed in 0.008981221000000001 s (achieved 11.5351803501996 GB/s) +Achieved 2.8503919455940343 GFLOPs +Completed in 0.008937282000000001 s (achieved 11.591891584040875 GB/s) +Achieved 2.8644055317936705 GFLOPs +Completed in 0.008959691 s (achieved 11.562899211591114 GB/s) +Achieved 2.857241393704314 GFLOPs +Completed in 0.009079740000000001 s (achieved 11.410018789084269 GB/s) +Achieved 2.8194639934623678 GFLOPs +Completed in 0.00898896 s (achieved 11.525249194567559 GB/s) +Achieved 2.847937914953454 GFLOPs +Completed in 0.008957333000000001 s (achieved 11.565943121685885 GB/s) +Achieved 2.8579935567874943 GFLOPs +Completed in 0.008938197 s (achieved 11.590704926284351 GB/s) +Achieved 2.8641123036334957 GFLOPs +Completed in 0.009017044 s (achieved 11.48935327364489 GB/s) +Achieved 2.8390678807822165 GFLOPs +Completed in 0.009077727 s (achieved 11.412548978395142 GB/s) +Achieved 2.820089213962923 GFLOPs +Completed in 0.009153618 s (achieved 11.317929588060153 GB/s) +Achieved 2.796708361655468 GFLOPs +Durations: [0.008981221000000001, 0.008937282000000001, 0.008959691, 0.009079740000000001, 0.00898896, 0.008957333000000001, 0.008938197, 0.009017044, 0.009077727, 0.009153618] +Median duration 0.00898896 (11.525249194567559 GB/s) 2.5274669286332365% of peak. +Median achieved 2.847937914953454 GFLOPs +./sycl_spmm 100000 100000 8 128 j 32 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 8 columns. +Using method j with WG size 32 +OK! +Completed in 0.002281884 s (achieved 47.85519509317739 GB/s) +Achieved 89.7503992315122 GFLOPs +Completed in 0.002280236 s (achieved 47.88978158401148 GB/s) +Achieved 89.81526473575542 GFLOPs +Completed in 0.0022812780000000003 s (achieved 47.86790737472592 GB/s) +Achieved 89.77424057918412 GFLOPs +Completed in 0.002282931 s (achieved 47.83324769780602 GB/s) +Achieved 89.70923781752492 GFLOPs +Completed in 0.002280457 s (achieved 47.88514056612337 GB/s) +Achieved 89.80656070252586 GFLOPs +Completed in 0.002281603 s (achieved 47.86108889232702 GB/s) +Achieved 89.76145280313885 GFLOPs +Completed in 0.0022821020000000003 s (achieved 47.85062367939732 GB/s) +Achieved 89.74182573785045 GFLOPs +Completed in 0.0022829 s (achieved 47.83389723597179 GB/s) +Achieved 89.7104559989487 GFLOPs +Completed in 0.0022834680000000003 s (achieved 47.821998819339704 GB/s) +Achieved 89.68814102058798 GFLOPs +Completed in 0.002281761 s (achieved 47.85777476256277 GB/s) +Achieved 89.75523729259987 GFLOPs +Durations: [0.002281884, 0.002280236, 0.0022812780000000003, 0.002282931, 0.002280457, 0.002281603, 0.0022821020000000003, 0.0022829, 0.0022834680000000003, 0.002281761] +Median duration 0.002281884 (47.85519509317739 GB/s) 10.494560327451183% of peak. +Median achieved 89.7503992315122 GFLOPs +./sycl_spmm 100000 100000 32 128 j 32 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 32 columns. +Using method j with WG size 32 +OK! +Completed in 0.002725312 s (achieved 47.113873200573 GB/s) +Achieved 300.5894370993119 GFLOPs +Completed in 0.0027241220000000003 s (achieved 47.13445433060634 GB/s) +Achieved 300.7207459871474 GFLOPs +Completed in 0.002725454 s (achieved 47.11141850128456 GB/s) +Achieved 300.5737759653988 GFLOPs +Completed in 0.0027231480000000003 s (achieved 47.15131311261819 GB/s) +Achieved 300.8283060634236 GFLOPs +Completed in 0.002723374 s (achieved 47.14740024689962 GB/s) +Achieved 300.8033417371246 GFLOPs +Completed in 0.0027249030000000003 s (achieved 47.12094485565174 GB/s) +Achieved 300.63455469791035 GFLOPs +Completed in 0.0027257360000000003 s (achieved 47.10654443423721 GB/s) +Achieved 300.54267911492525 GFLOPs +Completed in 0.002724985 s (achieved 47.119526896478334 GB/s) +Achieved 300.62550803031945 GFLOPs +Completed in 0.002725644 s (achieved 47.10813444455696 GB/s) +Achieved 300.5528234795153 GFLOPs +Completed in 0.002724663 s (achieved 47.12509547052242 GB/s) +Achieved 300.66103587856554 GFLOPs +Durations: [0.002725312, 0.0027241220000000003, 0.002725454, 0.0027231480000000003, 0.002723374, 0.0027249030000000003, 0.0027257360000000003, 0.002724985, 0.002725644, 0.002724663] +Median duration 0.002724985 (47.119526896478334 GB/s) 10.333229582561037% of peak. +Median achieved 300.62550803031945 GFLOPs +./sycl_spmm 100000 100000 64 128 j 32 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 64 columns. +Using method j with WG size 32 +OK! +Completed in 0.0047221170000000005 s (achieved 32.61249223600347 GB/s) +Achieved 346.96302527023363 GFLOPs +Completed in 0.004724231000000001 s (achieved 32.59789879030047 GB/s) +Achieved 346.80776617400795 GFLOPs +Completed in 0.004724957 s (achieved 32.59289005169782 GB/s) +Achieved 346.7544784005442 GFLOPs +Completed in 0.004724666000000001 s (achieved 32.59489750174932 GB/s) +Achieved 346.77583558287506 GFLOPs +Completed in 0.0047243120000000005 s (achieved 32.59733988779742 GB/s) +Achieved 346.80182003220784 GFLOPs +Completed in 0.0047239510000000005 s (achieved 32.59983094659533 GB/s) +Achieved 346.8283223090163 GFLOPs +Completed in 0.004724666000000001 s (achieved 32.59489750174932 GB/s) +Achieved 346.77583558287506 GFLOPs +Completed in 0.00472308 s (achieved 32.60584279749655 GB/s) +Achieved 346.89228215486503 GFLOPs +Completed in 0.004724331 s (achieved 32.59720878998529 GB/s) +Achieved 346.80042528772856 GFLOPs +Completed in 0.004723686 s (achieved 32.601659805499345 GB/s) +Achieved 346.84777946713643 GFLOPs +Durations: [0.0047221170000000005, 0.004724231000000001, 0.004724957, 0.004724666000000001, 0.0047243120000000005, 0.0047239510000000005, 0.004724666000000001, 0.00472308, 0.004724331, 0.004723686] +Median duration 0.0047243120000000005 (32.59733988779742 GB/s) 7.14853944907838% of peak. +Median achieved 346.80182003220784 GFLOPs +./sycl_spmm 100000 100000 128 128 j 32 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 128 columns. +Using method j with WG size 32 +OK! +Completed in 0.008437849 s (achieved 24.318994568402445 GB/s) +Achieved 388.3454183643248 GFLOPs +Completed in 0.008438842 s (achieved 24.316132948098804 GB/s) +Achieved 388.29972169167286 GFLOPs +Completed in 0.008437765 s (achieved 24.31923666990015 GB/s) +Achieved 388.3492844372888 GFLOPs +Completed in 0.008439721 s (achieved 24.313600414042124 GB/s) +Achieved 388.2592801349713 GFLOPs +Completed in 0.008437410000000001 s (achieved 24.320259890179567 GB/s) +Achieved 388.365624048138 GFLOPs +Completed in 0.008437881000000001 s (achieved 24.318902340528386 GB/s) +Achieved 388.34394559487146 GFLOPs +Completed in 0.008434622000000001 s (achieved 24.32829876667858 GB/s) +Achieved 388.49399534442676 GFLOPs +Completed in 0.008439821 s (achieved 24.31331233209804 GB/s) +Achieved 388.2546798089675 GFLOPs +Completed in 0.008438483 s (achieved 24.317167434004432 GB/s) +Achieved 388.3162412011733 GFLOPs +Completed in 0.008439965 s (achieved 24.312897506091556 GB/s) +Achieved 388.24805553103596 GFLOPs +Durations: [0.008437849, 0.008438842, 0.008437765, 0.008439721, 0.008437410000000001, 0.008437881000000001, 0.008434622000000001, 0.008439821, 0.008438483, 0.008439965] +Median duration 0.008438483 (24.317167434004432 GB/s) 5.332712156579919% of peak. +Median achieved 388.3162412011733 GFLOPs +./sycl_spmm 100000 100000 256 128 j 32 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 256 columns. +Using method j with WG size 32 +OK! +Completed in 0.016601819 s (achieved 18.528090445992696 GB/s) +Achieved 394.75192447285445 GFLOPs +Completed in 0.016605108 s (achieved 18.524420557818715 GB/s) +Achieved 394.6737353349343 GFLOPs +Completed in 0.016607141000000002 s (achieved 18.522152849789137 GB/s) +Achieved 394.6254204742405 GFLOPs +Completed in 0.016606079000000003 s (achieved 18.523337387471177 GB/s) +Achieved 394.65065775009253 GFLOPs +Completed in 0.01661026 s (achieved 18.51867484313912 GB/s) +Achieved 394.55131948566725 GFLOPs +Completed in 0.016610497000000002 s (achieved 18.518410617093515 GB/s) +Achieved 394.54568999350226 GFLOPs +Completed in 0.016612362000000002 s (achieved 18.51633163303328 GB/s) +Achieved 394.5013960085868 GFLOPs +Completed in 0.016622196000000002 s (achieved 18.505377027199053 GB/s) +Achieved 394.2680016527298 GFLOPs +Completed in 0.016617969 s (achieved 18.510084114370418 GB/s) +Achieved 394.3682889286892 GFLOPs +Completed in 0.016609733 s (achieved 18.519262410780474 GB/s) +Achieved 394.5638379617541 GFLOPs +Durations: [0.016601819, 0.016605108, 0.016607141000000002, 0.016606079000000003, 0.01661026, 0.016610497000000002, 0.016612362000000002, 0.016622196000000002, 0.016617969, 0.016609733] +Median duration 0.01661026 (18.51867484313912 GB/s) 4.061112904197175% of peak. +Median achieved 394.55131948566725 GFLOPs +./sycl_spmm 100000 100000 512 128 j 32 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 512 columns. +Using method j with WG size 32 +OK! +Completed in 0.033541890000000005 s (achieved 15.276420142096942 GB/s) +Achieved 390.7710626920546 GFLOPs +Completed in 0.033547594 s (achieved 15.273822736736353 GB/s) +Achieved 390.7046210228966 GFLOPs +Completed in 0.033577739 s (achieved 15.260110396355158 GB/s) +Achieved 390.3538591445958 GFLOPs +Completed in 0.033583234000000003 s (achieved 15.257613486539148 GB/s) +Achieved 390.289988153017 GFLOPs +Completed in 0.033560282000000004 s (achieved 15.268048224386195 GB/s) +Achieved 390.5569089079764 GFLOPs +Completed in 0.033558293 s (achieved 15.268953161592577 GB/s) +Achieved 390.5800572156635 GFLOPs +Completed in 0.033553311 s (achieved 15.271220297752434 GB/s) +Achieved 390.6380505935763 GFLOPs +Completed in 0.033560055000000005 s (achieved 15.268151497367928 GB/s) +Achieved 390.5595506324408 GFLOPs +Completed in 0.033557255 s (achieved 15.269425464031551 GB/s) +Achieved 390.59213871933207 GFLOPs +Completed in 0.033557663 s (achieved 15.26923981565701 GB/s) +Achieved 390.5873898310499 GFLOPs +Durations: [0.033541890000000005, 0.033547594, 0.033577739, 0.033583234000000003, 0.033560282000000004, 0.033558293, 0.033553311, 0.033560055000000005, 0.033557255, 0.033557663] +Median duration 0.033558293 (15.268953161592577 GB/s) 3.3484546407001266% of peak. +Median achieved 390.5800572156635 GFLOPs +./sycl_spmm 100000 100000 1 16 j 64 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 1 columns. +Using method j with WG size 64 +OK! +Completed in 0.0012670490000000001 s (achieved 11.049299592991273 GB/s) +Achieved 2.5255534710970133 GFLOPs +Completed in 0.001253729 s (achieved 11.166690728219576 GB/s) +Achieved 2.5523857229114104 GFLOPs +Completed in 0.001252752 s (achieved 11.175399440591596 GB/s) +Achieved 2.5543762851705685 GFLOPs +Completed in 0.0012501250000000002 s (achieved 11.198883311668833 GB/s) +Achieved 2.55974402559744 GFLOPs +Completed in 0.0012540890000000001 s (achieved 11.16348520719024 GB/s) +Achieved 2.5516530325997597 GFLOPs +Completed in 0.00124897 s (achieved 11.209239613441477 GB/s) +Achieved 2.562111179612 GFLOPs +Completed in 0.00124627 s (achieved 11.233524035722597 GB/s) +Achieved 2.567661903118907 GFLOPs +Completed in 0.0012502820000000001 s (achieved 11.197477049177706 GB/s) +Achieved 2.559422594262734 GFLOPs +Completed in 0.001250929 s (achieved 11.191685539307189 GB/s) +Achieved 2.5580988209562654 GFLOPs +Completed in 0.001252883 s (achieved 11.174230953728323 GB/s) +Achieved 2.554109202535273 GFLOPs +Durations: [0.0012670490000000001, 0.001253729, 0.001252752, 0.0012501250000000002, 0.0012540890000000001, 0.00124897, 0.00124627, 0.0012502820000000001, 0.001250929, 0.001252883] +Median duration 0.001252752 (11.175399440591596 GB/s) 2.450745491357806% of peak. +Median achieved 2.5543762851705685 GFLOPs +./sycl_spmm 100000 100000 8 16 j 64 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 8 columns. +Using method j with WG size 64 +OK! +Completed in 0.00041601100000000004 s (achieved 47.11414842395994 GB/s) +Achieved 61.53683436255291 GFLOPs +Completed in 0.000414388 s (achieved 47.298676602604324 GB/s) +Achieved 61.77785070996264 GFLOPs +Completed in 0.00041592500000000004 s (achieved 47.123890124421465 GB/s) +Achieved 61.549558213620244 GFLOPs +Completed in 0.00041576700000000005 s (achieved 47.14179817060998 GB/s) +Achieved 61.57294830999093 GFLOPs +Completed in 0.00041458500000000003 s (achieved 47.27620150270752 GB/s) +Achieved 61.74849548343524 GFLOPs +Completed in 0.00041583900000000004 s (achieved 47.1336358542609 GB/s) +Achieved 61.56228732754744 GFLOPs +Completed in 0.00041696500000000004 s (achieved 47.00635305121533 GB/s) +Achieved 61.396040435048505 GFLOPs +Completed in 0.000416161 s (achieved 47.097166721533256 GB/s) +Achieved 61.51465418431809 GFLOPs +Completed in 0.000415548 s (achieved 47.166642602058005 GB/s) +Achieved 61.60539817301491 GFLOPs +Completed in 0.000414447 s (achieved 47.291943240028274 GB/s) +Achieved 61.769056115739765 GFLOPs +Durations: [0.00041601100000000004, 0.000414388, 0.00041592500000000004, 0.00041576700000000005, 0.00041458500000000003, 0.00041583900000000004, 0.00041696500000000004, 0.000416161, 0.000415548, 0.000414447] +Median duration 0.00041583900000000004 (47.1336358542609 GB/s) 10.336323652250197% of peak. +Median achieved 61.56228732754744 GFLOPs +./sycl_spmm 100000 100000 32 16 j 64 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 32 columns. +Using method j with WG size 64 +OK! +Completed in 0.00046125800000000003 s (achieved 84.11779091094354 GB/s) +Achieved 222.0015696204727 GFLOPs +Completed in 0.00046187400000000003 s (achieved 84.00560325976348 GB/s) +Achieved 221.7054867777792 GFLOPs +Completed in 0.000461656 s (achieved 84.04527180411388 GB/s) +Achieved 221.81017900774603 GFLOPs +Completed in 0.00046145500000000005 s (achieved 84.08188013999197 GB/s) +Achieved 221.90679481206183 GFLOPs +Completed in 0.000460417 s (achieved 84.27144088945455 GB/s) +Achieved 222.40707880030493 GFLOPs +Completed in 0.00046087900000000003 s (achieved 84.18696447440651 GB/s) +Achieved 222.18413075883257 GFLOPs +Completed in 0.000462031 s (achieved 83.97705781646685 GB/s) +Achieved 221.63015035787643 GFLOPs +Completed in 0.000461367 s (achieved 84.0979177097625 GB/s) +Achieved 221.94912076503087 GFLOPs +Completed in 0.000461949 s (achieved 83.99196448092755 GB/s) +Achieved 221.66949165384057 GFLOPs +Completed in 0.000461621 s (achieved 84.0516440976472 GB/s) +Achieved 221.8269966054404 GFLOPs +Durations: [0.00046125800000000003, 0.00046187400000000003, 0.000461656, 0.00046145500000000005, 0.000460417, 0.00046087900000000003, 0.000462031, 0.000461367, 0.000461949, 0.000461621] +Median duration 0.000461621 (84.0516440976472 GB/s) 18.432378091589296% of peak. +Median achieved 221.8269966054404 GFLOPs +./sycl_spmm 100000 100000 64 16 j 64 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 64 columns. +Using method j with WG size 64 +OK! +Completed in 0.0007169990000000001 s (achieved 89.81881983099001 GB/s) +Achieved 285.634986938615 GFLOPs +Completed in 0.000716472 s (achieved 89.88488594111146 GB/s) +Achieved 285.8450853627218 GFLOPs +Completed in 0.000717053 s (achieved 89.8120557336766 GB/s) +Achieved 285.61347627023383 GFLOPs +Completed in 0.000715742 s (achieved 89.97656138664492 GB/s) +Achieved 286.13662464966427 GFLOPs +Completed in 0.000716595 s (achieved 89.86945764343878 GB/s) +Achieved 285.7960214626114 GFLOPs +Completed in 0.0007162560000000001 s (achieved 89.91199236027343 GB/s) +Achieved 285.93128713755976 GFLOPs +Completed in 0.000715818 s (achieved 89.96700837363689 GB/s) +Achieved 286.106244883476 GFLOPs +Completed in 0.000716873 s (achieved 89.83460668765598 GB/s) +Achieved 285.6851911007947 GFLOPs +Completed in 0.0007167250000000001 s (achieved 89.8531570686107 GB/s) +Achieved 285.7441836129617 GFLOPs +Completed in 0.000715661 s (achieved 89.98674512094415 GB/s) +Achieved 286.16901018778447 GFLOPs +Durations: [0.0007169990000000001, 0.000716472, 0.000717053, 0.000715742, 0.000716595, 0.0007162560000000001, 0.000715818, 0.000716873, 0.0007167250000000001, 0.000715661] +Median duration 0.000716595 (89.86945764343878 GB/s) 19.70821439549096% of peak. +Median achieved 285.7960214626114 GFLOPs +./sycl_spmm 100000 100000 128 16 j 64 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 128 columns. +Using method j with WG size 64 +OK! +Completed in 0.001217283 s (achieved 94.9655946891561 GB/s) +Achieved 336.4870781897061 GFLOPs +Completed in 0.001216592 s (achieved 95.01953325354762 GB/s) +Achieved 336.67819614135226 GFLOPs +Completed in 0.001217285 s (achieved 94.9654386606259 GB/s) +Achieved 336.4865253412307 GFLOPs +Completed in 0.001216251 s (achieved 95.04617385720546 GB/s) +Achieved 336.7725905261332 GFLOPs +Completed in 0.00122903 s (achieved 94.05791884657006 GB/s) +Achieved 333.27095351618755 GFLOPs +Completed in 0.001218731 s (achieved 94.85276406360387 GB/s) +Achieved 336.08729079673856 GFLOPs +Completed in 0.0012161720000000002 s (achieved 95.05234785869104 GB/s) +Achieved 336.79446657216243 GFLOPs +Completed in 0.001218749 s (achieved 94.85136316009284 GB/s) +Achieved 336.08232704190937 GFLOPs +Completed in 0.0012168440000000001 s (achieved 94.99985536354701 GB/s) +Achieved 336.608472408953 GFLOPs +Completed in 0.001217807 s (achieved 94.92473273679656 GB/s) +Achieved 336.3422939759749 GFLOPs +Durations: [0.001217283, 0.001216592, 0.001217285, 0.001216251, 0.00122903, 0.001218731, 0.0012161720000000002, 0.001218749, 0.0012168440000000001, 0.001217807] +Median duration 0.001217285 (94.9654386606259 GB/s) 20.825754092242523% of peak. +Median achieved 336.4865253412307 GFLOPs +./sycl_spmm 100000 100000 256 16 j 64 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 256 columns. +Using method j with WG size 64 +OK! +Completed in 0.002332209 s (achieved 93.47361407146616 GB/s) +Achieved 351.25496900149176 GFLOPs +Completed in 0.0023344340000000002 s (achieved 93.38452232960967 GB/s) +Achieved 350.9201802235574 GFLOPs +Completed in 0.0023324590000000003 s (achieved 93.46359528720548 GB/s) +Achieved 351.2173204330708 GFLOPs +Completed in 0.002331746 s (achieved 93.49217453358986 GB/s) +Achieved 351.3247154707245 GFLOPs +Completed in 0.002333351 s (achieved 93.42786576044497 GB/s) +Achieved 351.08305608543253 GFLOPs +Completed in 0.002332673 s (achieved 93.45502091377575 GB/s) +Achieved 351.1850996689206 GFLOPs +Completed in 0.0023335680000000003 s (achieved 93.41917784268554 GB/s) +Achieved 351.0504086446163 GFLOPs +Completed in 0.0023332 s (achieved 93.43391222355564 GB/s) +Achieved 351.10577747299845 GFLOPs +Completed in 0.0023326640000000004 s (achieved 93.455381486575 GB/s) +Achieved 351.18645462869915 GFLOPs +Completed in 0.0023327390000000003 s (achieved 93.45237679826161 GB/s) +Achieved 351.1751636166755 GFLOPs +Durations: [0.002332209, 0.0023344340000000002, 0.0023324590000000003, 0.002331746, 0.002333351, 0.002332673, 0.0023335680000000003, 0.0023332, 0.0023326640000000004, 0.0023327390000000003] +Median duration 0.0023327390000000003 (93.45237679826161 GB/s) 20.49394228032053% of peak. +Median achieved 351.1751636166755 GFLOPs +./sycl_spmm 100000 100000 512 16 j 64 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 512 columns. +Using method j with WG size 64 +OK! +Completed in 0.004589945000000001 s (achieved 92.11439439906141 GB/s) +Achieved 356.95416829613424 GFLOPs +Completed in 0.004590917 s (achieved 92.0948917177113 GB/s) +Achieved 356.87859310024555 GFLOPs +Completed in 0.004592336000000001 s (achieved 92.0664350343703 GB/s) +Achieved 356.76832008807713 GFLOPs +Completed in 0.004592255 s (achieved 92.0680589383647 GB/s) +Achieved 356.77461290803757 GFLOPs +Completed in 0.004589873 s (achieved 92.11583937071896 GB/s) +Achieved 356.9597677321355 GFLOPs +Completed in 0.004588511 s (achieved 92.14318196033528 GB/s) +Achieved 357.0657234994097 GFLOPs +Completed in 0.004589445 s (achieved 92.12442986025543 GB/s) +Achieved 356.993056894679 GFLOPs +Completed in 0.004589844 s (achieved 92.11642138599917 GB/s) +Achieved 356.96202311015367 GFLOPs +Completed in 0.004590614000000001 s (achieved 92.10097037128365 GB/s) +Achieved 356.90214860147245 GFLOPs +Completed in 0.004589515000000001 s (achieved 92.12302476405458 GB/s) +Achieved 356.9876119807866 GFLOPs +Durations: [0.004589945000000001, 0.004590917, 0.004592336000000001, 0.004592255, 0.004589873, 0.004588511, 0.004589445, 0.004589844, 0.004590614000000001, 0.004589515000000001] +Median duration 0.004589945000000001 (92.11439439906141 GB/s) 20.200525087513466% of peak. +Median achieved 356.95416829613424 GFLOPs +./sycl_spmm 100000 100000 1 32 j 64 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 1 columns. +Using method j with WG size 64 +OK! +Completed in 0.002542181 s (achieved 10.542130556400194 GB/s) +Achieved 2.517523339211488 GFLOPs +Completed in 0.0025793210000000003 s (achieved 10.390332959720794 GB/s) +Achieved 2.481273172280612 GFLOPs +Completed in 0.0025594040000000004 s (achieved 10.471189386278992 GB/s) +Achieved 2.5005821667857044 GFLOPs +Completed in 0.0025354070000000004 s (achieved 10.570296603267247 GB/s) +Achieved 2.5242495583549305 GFLOPs +Completed in 0.002550661 s (achieved 10.507081889753284 GB/s) +Achieved 2.5091535096196633 GFLOPs +Completed in 0.0025738650000000003 s (achieved 10.412358068507867 GB/s) +Achieved 2.4865328989671176 GFLOPs +Completed in 0.0025584180000000002 s (achieved 10.475224924152347 GB/s) +Achieved 2.5015458771787875 GFLOPs +Completed in 0.00256848 s (achieved 10.434188313710834 GB/s) +Achieved 2.4917460910733196 GFLOPs +Completed in 0.002580639 s (achieved 10.385026344250397 GB/s) +Achieved 2.4800059210141363 GFLOPs +Completed in 0.0025679780000000003 s (achieved 10.436228036221493 GB/s) +Achieved 2.492233188913612 GFLOPs +Durations: [0.002542181, 0.0025793210000000003, 0.0025594040000000004, 0.0025354070000000004, 0.002550661, 0.0025738650000000003, 0.0025584180000000002, 0.00256848, 0.002580639, 0.0025679780000000003] +Median duration 0.0025679780000000003 (10.436228036221493 GB/s) 2.2886464991713797% of peak. +Median achieved 2.492233188913612 GFLOPs +./sycl_spmm 100000 100000 8 32 j 64 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 8 columns. +Using method j with WG size 64 +OK! +Completed in 0.0007751100000000001 s (achieved 41.800523796622414 GB/s) +Achieved 66.05514056069461 GFLOPs +Completed in 0.0007802960000000001 s (achieved 41.522709330818046 GB/s) +Achieved 65.61612516275875 GFLOPs +Completed in 0.000775197 s (achieved 41.795832543211596 GB/s) +Achieved 66.04772722288656 GFLOPs +Completed in 0.000774494 s (achieved 41.83377017769021 GB/s) +Achieved 66.10767804527859 GFLOPs +Completed in 0.000775181 s (achieved 41.79669522343814 GB/s) +Achieved 66.04909047048368 GFLOPs +Completed in 0.000775107 s (achieved 41.80068558276471 GB/s) +Achieved 66.05539622271506 GFLOPs +Completed in 0.0007754530000000001 s (achieved 41.78203450112386 GB/s) +Achieved 66.02592291215586 GFLOPs +Completed in 0.0007752630000000001 s (achieved 41.79227436366755 GB/s) +Achieved 66.04210442133831 GFLOPs +Completed in 0.0007744140000000001 s (achieved 41.83809177003515 GB/s) +Achieved 66.11450722740032 GFLOPs +Completed in 0.000775084 s (achieved 41.80192598479649 GB/s) +Achieved 66.05735636395539 GFLOPs +Durations: [0.0007751100000000001, 0.0007802960000000001, 0.000775197, 0.000774494, 0.000775181, 0.000775107, 0.0007754530000000001, 0.0007752630000000001, 0.0007744140000000001, 0.000775084] +Median duration 0.000775181 (41.79669522343814 GB/s) 9.165941934964504% of peak. +Median achieved 66.04909047048368 GFLOPs +./sycl_spmm 100000 100000 32 32 j 64 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 32 columns. +Using method j with WG size 64 +OK! +Completed in 0.000866075 s (achieved 59.57914037467887 GB/s) +Achieved 236.46912796235893 GFLOPs +Completed in 0.0008658800000000001 s (achieved 59.59255786021158 GB/s) +Achieved 236.52238185429852 GFLOPs +Completed in 0.000867404 s (achieved 59.48785571659804 GB/s) +Achieved 236.106819890155 GFLOPs +Completed in 0.000865877 s (achieved 59.59276433026862 GB/s) +Achieved 236.52320133229085 GFLOPs +Completed in 0.0008660470000000001 s (achieved 59.58106661647693 GB/s) +Achieved 236.47677320053066 GFLOPs +Completed in 0.000868272 s (achieved 59.428386496397444 GB/s) +Achieved 235.87078703447767 GFLOPs +Completed in 0.000868237 s (achieved 59.430782148192264 GB/s) +Achieved 235.88029535714327 GFLOPs +Completed in 0.00086625 s (achieved 59.567104184704185 GB/s) +Achieved 236.42135642135642 GFLOPs +Completed in 0.0008664530000000001 s (achieved 59.55314829540667 GB/s) +Achieved 236.36596560921365 GFLOPs +Completed in 0.000868019 s (achieved 59.44570798565469 GB/s) +Achieved 235.9395358857352 GFLOPs +Durations: [0.000866075, 0.0008658800000000001, 0.000867404, 0.000865877, 0.0008660470000000001, 0.000868272, 0.000868237, 0.00086625, 0.0008664530000000001, 0.000868019] +Median duration 0.0008664530000000001 (59.55314829540667 GB/s) 13.059900941975146% of peak. +Median achieved 236.36596560921365 GFLOPs +./sycl_spmm 100000 100000 64 32 j 64 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 64 columns. +Using method j with WG size 64 +OK! +Completed in 0.0013779900000000002 s (achieved 56.023631521273735 GB/s) +Achieved 297.24453733336236 GFLOPs +Completed in 0.001379584 s (achieved 55.958900654110224 GB/s) +Achieved 296.9010948227872 GFLOPs +Completed in 0.001377865 s (achieved 56.02871398867088 GB/s) +Achieved 297.2715033766008 GFLOPs +Completed in 0.0013784770000000001 s (achieved 56.00383901943957 GB/s) +Achieved 297.13952427207704 GFLOPs +Completed in 0.001378279 s (achieved 56.01188438625271 GB/s) +Achieved 297.18221056839724 GFLOPs +Completed in 0.0013784670000000002 s (achieved 56.00424529567991 GB/s) +Achieved 297.1416798516032 GFLOPs +Completed in 0.001379604 s (achieved 55.958089422761894 GB/s) +Achieved 296.8967906732657 GFLOPs +Completed in 0.0013781910000000002 s (achieved 56.015460846863746 GB/s) +Achieved 297.20118619262496 GFLOPs +Completed in 0.001377532 s (achieved 56.04225818347595 GB/s) +Achieved 297.34336480023694 GFLOPs +Completed in 0.001378743 s (achieved 55.993034234806636 GB/s) +Achieved 297.0821973348188 GFLOPs +Durations: [0.0013779900000000002, 0.001379584, 0.001377865, 0.0013784770000000001, 0.001378279, 0.0013784670000000002, 0.001379604, 0.0013781910000000002, 0.001377532, 0.001378743] +Median duration 0.0013784670000000002 (56.00424529567991 GB/s) 12.281632740280681% of peak. +Median achieved 297.1416798516032 GFLOPs +./sycl_spmm 100000 100000 128 32 j 64 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 128 columns. +Using method j with WG size 64 +OK! +Completed in 0.002369197 s (achieved 54.19557934608224 GB/s) +Achieved 345.77116212792777 GFLOPs +Completed in 0.0023692870000000003 s (achieved 54.19352066676599 GB/s) +Achieved 345.75802762603263 GFLOPs +Completed in 0.0023689310000000003 s (achieved 54.201664801549725 GB/s) +Achieved 345.8099877117569 GFLOPs +Completed in 0.0023693430000000003 s (achieved 54.19223978968009 GB/s) +Achieved 345.7498555506737 GFLOPs +Completed in 0.002367178 s (achieved 54.24180353146236 GB/s) +Achieved 346.0660753014771 GFLOPs +Completed in 0.002368883 s (achieved 54.202763074411024 GB/s) +Achieved 345.8169947608219 GFLOPs +Completed in 0.0023686500000000004 s (achieved 54.20809490638127 GB/s) +Achieved 345.85101217993366 GFLOPs +Completed in 0.002369284 s (achieved 54.19358928689005 GB/s) +Achieved 345.75846542668586 GFLOPs +Completed in 0.002367449 s (achieved 54.2355945154468 GB/s) +Achieved 346.0264613936773 GFLOPs +Completed in 0.0023694090000000003 s (achieved 54.190730262272155 GB/s) +Achieved 345.7402246720595 GFLOPs +Durations: [0.002369197, 0.0023692870000000003, 0.0023689310000000003, 0.0023693430000000003, 0.002367178, 0.002368883, 0.0023686500000000004, 0.002369284, 0.002367449, 0.0023694090000000003] +Median duration 0.002369197 (54.19557934608224 GB/s) 11.88499547063207% of peak. +Median achieved 345.77116212792777 GFLOPs +./sycl_spmm 100000 100000 256 32 j 64 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 256 columns. +Using method j with WG size 64 +OK! +Completed in 0.004476264000000001 s (achieved 51.560856106789046 GB/s) +Achieved 366.01951984958885 GFLOPs +Completed in 0.004478422 s (achieved 51.53601067518871 GB/s) +Achieved 365.8431474300546 GFLOPs +Completed in 0.004489758 s (achieved 51.40588958246747 GB/s) +Achieved 364.9194455469538 GFLOPs +Completed in 0.004479975000000001 s (achieved 51.51814552536565 GB/s) +Achieved 365.7163265420008 GFLOPs +Completed in 0.004479993000000001 s (achieved 51.51793853249323 GB/s) +Achieved 365.71485714374995 GFLOPs +Completed in 0.004476464 s (achieved 51.55855246462386 GB/s) +Achieved 366.0031667852126 GFLOPs +Completed in 0.004480302 s (achieved 51.51438541419752 GB/s) +Achieved 365.6896343148297 GFLOPs +Completed in 0.004478585 s (achieved 51.534135000228865 GB/s) +Achieved 365.82983241358596 GFLOPs +Completed in 0.004478846 s (achieved 51.531131903173275 GB/s) +Achieved 365.80851406813275 GFLOPs +Completed in 0.004479464000000001 s (achieved 51.524022516979706 GB/s) +Achieved 365.7580460519383 GFLOPs +Durations: [0.004476264000000001, 0.004478422, 0.004489758, 0.004479975000000001, 0.004479993000000001, 0.004476464, 0.004480302, 0.004478585, 0.004478846, 0.004479464000000001] +Median duration 0.004479464000000001 (51.524022516979706 GB/s) 11.29912774495169% of peak. +Median achieved 365.7580460519383 GFLOPs +./sycl_spmm 100000 100000 512 32 j 64 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 512 columns. +Using method j with WG size 64 +OK! +Completed in 0.008944444000000001 s (achieved 48.700623985124174 GB/s) +Achieved 366.35032876274926 GFLOPs +Completed in 0.008950415 s (achieved 48.66813482950233 GB/s) +Achieved 366.105929166413 GFLOPs +Completed in 0.008948596000000001 s (achieved 48.67802770401077 GB/s) +Achieved 366.1803482915085 GFLOPs +Completed in 0.008947077000000001 s (achieved 48.6862920705835 GB/s) +Achieved 366.2425169695086 GFLOPs +Completed in 0.008951261 s (achieved 48.6635351153318 GB/s) +Achieved 366.0713278274424 GFLOPs +Completed in 0.008946757000000001 s (achieved 48.68803344049693 GB/s) +Achieved 366.2556164205644 GFLOPs +Completed in 0.008945515000000001 s (achieved 48.694793312626494 GB/s) +Achieved 366.3064675426736 GFLOPs +Completed in 0.008948833 s (achieved 48.67673851998356 GB/s) +Achieved 366.1706504077124 GFLOPs +Completed in 0.008949127000000001 s (achieved 48.675139373929994 GB/s) +Achieved 366.158620835306 GFLOPs +Completed in 0.00895105 s (achieved 48.664682243982554 GB/s) +Achieved 366.079957100005 GFLOPs +Durations: [0.008944444000000001, 0.008950415, 0.008948596000000001, 0.008947077000000001, 0.008951261, 0.008946757000000001, 0.008945515000000001, 0.008948833, 0.008949127000000001, 0.00895105] +Median duration 0.008948833 (48.67673851998356 GB/s) 10.674723359645517% of peak. +Median achieved 366.1706504077124 GFLOPs +./sycl_spmm 100000 100000 1 64 j 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 1 columns. +Using method j with WG size 64 +OK! +Completed in 0.005110557 s (achieved 10.253286285624052 GB/s) +Achieved 2.504619359494474 GFLOPs +Completed in 0.005062591 s (achieved 10.350432021863904 GB/s) +Achieved 2.5283496138637314 GFLOPs +Completed in 0.0051109350000000005 s (achieved 10.25252796210478 GB/s) +Achieved 2.5044341201756626 GFLOPs +Completed in 0.0051161060000000005 s (achieved 10.24216542815962 GB/s) +Achieved 2.5019028143670203 GFLOPs +Completed in 0.0051164750000000005 s (achieved 10.24142676354326 GB/s) +Achieved 2.5017223772225994 GFLOPs +Completed in 0.005082523 s (achieved 10.309840998260116 GB/s) +Achieved 2.5184342500761923 GFLOPs +Completed in 0.005104598 s (achieved 10.2652557556932 GB/s) +Achieved 2.5075431992881714 GFLOPs +Completed in 0.0051195 s (achieved 10.235375329622034 GB/s) +Achieved 2.5002441644691866 GFLOPs +Completed in 0.005097298 s (achieved 10.279956949740823 GB/s) +Achieved 2.5111343303844507 GFLOPs +Completed in 0.005125766 s (achieved 10.222863080366913 GB/s) +Achieved 2.497187737403541 GFLOPs +Durations: [0.005110557, 0.005062591, 0.0051109350000000005, 0.0051161060000000005, 0.0051164750000000005, 0.005082523, 0.005104598, 0.0051195, 0.005097298, 0.005125766] +Median duration 0.0051109350000000005 (10.25252796210478 GB/s) 2.2483613951984167% of peak. +Median achieved 2.5044341201756626 GFLOPs +./sycl_spmm 100000 100000 8 64 j 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 8 columns. +Using method j with WG size 64 +OK! +Completed in 0.001494321 s (achieved 38.813617689907325 GB/s) +Achieved 68.526106505898 GFLOPs +Completed in 0.001497302 s (achieved 38.73634310246029 GB/s) +Achieved 68.38967689884873 GFLOPs +Completed in 0.0014938610000000002 s (achieved 38.82556944722434 GB/s) +Achieved 68.54720753805073 GFLOPs +Completed in 0.001493996 s (achieved 38.822061103242575 GB/s) +Achieved 68.54101349668942 GFLOPs +Completed in 0.0014950240000000002 s (achieved 38.79536649578869 GB/s) +Achieved 68.49388371022806 GFLOPs +Completed in 0.001495494 s (achieved 38.783173987993266 GB/s) +Achieved 68.47235762898414 GFLOPs +Completed in 0.0014957800000000002 s (achieved 38.775758467154255 GB/s) +Achieved 68.45926539999198 GFLOPs +Completed in 0.0014956700000000002 s (achieved 38.77861025493591 GB/s) +Achieved 68.46430028014201 GFLOPs +Completed in 0.00149514 s (achieved 38.792356568615645 GB/s) +Achieved 68.48856963227524 GFLOPs +Completed in 0.0014939430000000002 s (achieved 38.82343837750168 GB/s) +Achieved 68.54344509797227 GFLOPs +Durations: [0.001494321, 0.001497302, 0.0014938610000000002, 0.001493996, 0.0014950240000000002, 0.001495494, 0.0014957800000000002, 0.0014956700000000002, 0.00149514, 0.0014939430000000002] +Median duration 0.00149514 (38.792356568615645 GB/s) 8.5070957387315% of peak. +Median achieved 68.48856963227524 GFLOPs +./sycl_spmm 100000 100000 32 64 j 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 32 columns. +Using method j with WG size 64 +OK! +Completed in 0.0016971680000000002 s (achieved 45.487543955577756 GB/s) +Achieved 241.34322589160294 GFLOPs +Completed in 0.001697598 s (achieved 45.47602200285345 GB/s) +Achieved 241.28209387617093 GFLOPs +Completed in 0.0016969060000000002 s (achieved 45.494567171074884 GB/s) +Achieved 241.38048896049634 GFLOPs +Completed in 0.0016966000000000001 s (achieved 45.50277260403159 GB/s) +Achieved 241.4240245196275 GFLOPs +Completed in 0.001697237 s (achieved 45.48569469084164 GB/s) +Achieved 241.33341424915906 GFLOPs +Completed in 0.0016972950000000001 s (achieved 45.48414035273774 GB/s) +Achieved 241.32516739871383 GFLOPs +Completed in 0.001696992 s (achieved 45.49226160170466 GB/s) +Achieved 241.36825630291716 GFLOPs +Completed in 0.0016979550000000001 s (achieved 45.46646053635108 GB/s) +Achieved 241.23136361093196 GFLOPs +Completed in 0.0016961440000000001 s (achieved 45.51500580139422 GB/s) +Achieved 241.48893018517296 GFLOPs +Completed in 0.0016968850000000002 s (achieved 45.49513019444452 GB/s) +Achieved 241.38347619314212 GFLOPs +Durations: [0.0016971680000000002, 0.001697598, 0.0016969060000000002, 0.0016966000000000001, 0.001697237, 0.0016972950000000001, 0.001696992, 0.0016979550000000001, 0.0016961440000000001, 0.0016968850000000002] +Median duration 0.0016971680000000002 (45.487543955577756 GB/s) 9.975338586749507% of peak. +Median achieved 241.34322589160294 GFLOPs +./sycl_spmm 100000 100000 64 64 j 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 64 columns. +Using method j with WG size 64 +OK! +Completed in 0.002741701 s (achieved 37.49497264654315 GB/s) +Achieved 298.7926108645691 GFLOPs +Completed in 0.002743681 s (achieved 37.46791409059581 GB/s) +Achieved 298.57698471505984 GFLOPs +Completed in 0.0027428 s (achieved 37.47994895726994 GB/s) +Achieved 298.67288901852123 GFLOPs +Completed in 0.002743151 s (achieved 37.47515320884632 GB/s) +Achieved 298.6346723166169 GFLOPs +Completed in 0.002741926 s (achieved 37.49189584255738 GB/s) +Achieved 298.7680922096366 GFLOPs +Completed in 0.002742258 s (achieved 37.48735676949433 GB/s) +Achieved 298.7319209206428 GFLOPs +Completed in 0.0027415630000000002 s (achieved 37.49686000285239 GB/s) +Achieved 298.80765096406685 GFLOPs +Completed in 0.002741807 s (achieved 37.49352306708678 GB/s) +Achieved 298.78105935246356 GFLOPs +Completed in 0.002740754 s (achieved 37.50792811029373 GB/s) +Achieved 298.8958512876384 GFLOPs +Completed in 0.002742401 s (achieved 37.485402025451414 GB/s) +Achieved 298.71634381696913 GFLOPs +Durations: [0.002741701, 0.002743681, 0.0027428, 0.002743151, 0.002741926, 0.002742258, 0.0027415630000000002, 0.002741807, 0.002740754, 0.002742401] +Median duration 0.002742258 (37.48735676949433 GB/s) 8.22091157225753% of peak. +Median achieved 298.7319209206428 GFLOPs +./sycl_spmm 100000 100000 128 64 j 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 128 columns. +Using method j with WG size 64 +OK! +Completed in 0.004691527 s (achieved 32.82513433259576 GB/s) +Achieved 349.2253161923613 GFLOPs +Completed in 0.00469428 s (achieved 32.80588375640141 GB/s) +Achieved 349.0205100675716 GFLOPs +Completed in 0.004692606 s (achieved 32.81758664588504 GB/s) +Achieved 349.14501664959727 GFLOPs +Completed in 0.004692915000000001 s (achieved 32.81542580677468 GB/s) +Achieved 349.1220275670878 GFLOPs +Completed in 0.004694487000000001 s (achieved 32.804437204746755 GB/s) +Achieved 349.00512026127666 GFLOPs +Completed in 0.004694856000000001 s (achieved 32.801858885554736 GB/s) +Achieved 348.9776896245593 GFLOPs +Completed in 0.004693279000000001 s (achieved 32.81288071729807 GB/s) +Achieved 349.0949504600088 GFLOPs +Completed in 0.0046909890000000004 s (achieved 32.82889898057744 GB/s) +Achieved 349.26536813452344 GFLOPs +Completed in 0.004694457 s (achieved 32.80464684200963 GB/s) +Achieved 349.00735058389074 GFLOPs +Completed in 0.004692847 s (achieved 32.81590130681865 GB/s) +Achieved 349.1270863933983 GFLOPs +Durations: [0.004691527, 0.00469428, 0.004692606, 0.004692915000000001, 0.004694487000000001, 0.004694856000000001, 0.004693279000000001, 0.0046909890000000004, 0.004694457, 0.004692847] +Median duration 0.004693279000000001 (32.81288071729807 GB/s) 7.195807174846068% of peak. +Median achieved 349.0949504600088 GFLOPs +./sycl_spmm 100000 100000 256 64 j 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 256 columns. +Using method j with WG size 64 +OK! +Completed in 0.008770595 s (achieved 29.234049001236517 GB/s) +Achieved 373.6120525460359 GFLOPs +Completed in 0.0087635 s (achieved 29.25771712215439 GB/s) +Achieved 373.9145318651224 GFLOPs +Completed in 0.008771542 s (achieved 29.230892812233016 GB/s) +Achieved 373.5717163527234 GFLOPs +Completed in 0.008765649 s (achieved 29.250544255194338 GB/s) +Achieved 373.8228624030006 GFLOPs +Completed in 0.008767261 s (achieved 29.245166078664706 GB/s) +Achieved 373.75412914021837 GFLOPs +Completed in 0.008771923000000001 s (achieved 29.229623196646845 GB/s) +Achieved 373.55549062617166 GFLOPs +Completed in 0.008766927 s (achieved 29.24628025304648 GB/s) +Achieved 373.7683683233589 GFLOPs +Completed in 0.008770932 s (achieved 29.232925759771025 GB/s) +Achieved 373.59769748528436 GFLOPs +Completed in 0.008769361 s (achieved 29.2381627350043 GB/s) +Achieved 373.6646261911216 GFLOPs +Completed in 0.008778560000000001 s (achieved 29.207524240877774 GB/s) +Achieved 373.2730652863339 GFLOPs +Durations: [0.008770595, 0.0087635, 0.008771542, 0.008765649, 0.008767261, 0.008771923000000001, 0.008766927, 0.008770932, 0.008769361, 0.008778560000000001] +Median duration 0.008770595 (29.234049001236517 GB/s) 6.410975658165903% of peak. +Median achieved 373.6120525460359 GFLOPs +./sycl_spmm 100000 100000 512 64 j 64 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 512 columns. +Using method j with WG size 64 +OK! +Completed in 0.01768654 s (achieved 26.076327195709283 GB/s) +Achieved 370.54166614838175 GFLOPs +Completed in 0.017664928 s (achieved 26.10823004769677 GB/s) +Achieved 370.9950020741664 GFLOPs +Completed in 0.017679816 s (achieved 26.08624456272622 GB/s) +Achieved 370.6825908142935 GFLOPs +Completed in 0.017684503 s (achieved 26.07933081297224 GB/s) +Achieved 370.5843472106624 GFLOPs +Completed in 0.017684819 s (achieved 26.078864816201964 GB/s) +Achieved 370.577725449155 GFLOPs +Completed in 0.017687984 s (achieved 26.074198393666574 GB/s) +Achieved 370.5114161116383 GFLOPs +Completed in 0.017670049 s (achieved 26.10066355786563 GB/s) +Achieved 370.88748310771524 GFLOPs +Completed in 0.017681449000000002 s (achieved 26.083835323677377 GB/s) +Achieved 370.6483557993465 GFLOPs +Completed in 0.017687564 s (achieved 26.074817538469404 GB/s) +Achieved 370.52021408940203 GFLOPs +Completed in 0.017673454 s (achieved 26.09563495624568 GB/s) +Achieved 370.816027246287 GFLOPs +Durations: [0.01768654, 0.017664928, 0.017679816, 0.017684503, 0.017684819, 0.017687984, 0.017670049, 0.017681449000000002, 0.017687564, 0.017673454] +Median duration 0.017684503 (26.07933081297224 GB/s) 5.719151494072859% of peak. +Median achieved 370.5843472106624 GFLOPs +./sycl_spmm 100000 100000 1 128 j 64 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 1 columns. +Using method j with WG size 64 +OK! +Completed in 0.010103951 s (achieved 10.253415124439936 GB/s) +Achieved 2.53366232674723 GFLOPs +Completed in 0.010153092 s (achieved 10.203788560174576 GB/s) +Achieved 2.5213993924215403 GFLOPs +Completed in 0.010249057 s (achieved 10.108247422177476 GB/s) +Achieved 2.4977907723608133 GFLOPs +Completed in 0.010085795 s (achieved 10.271872866739807 GB/s) +Achieved 2.538223313085384 GFLOPs +Completed in 0.010100786 s (achieved 10.256627949547688 GB/s) +Achieved 2.5344562294459063 GFLOPs +Completed in 0.010144151 s (achieved 10.212782124398583 GB/s) +Achieved 2.523621740252092 GFLOPs +Completed in 0.010113821 s (achieved 10.243408895609287 GB/s) +Achieved 2.531189745201146 GFLOPs +Completed in 0.010102225000000001 s (achieved 10.255166955794392 GB/s) +Achieved 2.534095211698413 GFLOPs +Completed in 0.010178815 s (achieved 10.178002449204548 GB/s) +Achieved 2.515027535130563 GFLOPs +Completed in 0.010067239 s (achieved 10.290806049205745 GB/s) +Achieved 2.5429017827032814 GFLOPs +Durations: [0.010103951, 0.010153092, 0.010249057, 0.010085795, 0.010100786, 0.010144151, 0.010113821, 0.010102225000000001, 0.010178815, 0.010067239] +Median duration 0.010113821 (10.243408895609287 GB/s) 2.246361599914317% of peak. +Median achieved 2.531189745201146 GFLOPs +./sycl_spmm 100000 100000 8 128 j 64 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 8 columns. +Using method j with WG size 64 +OK! +Completed in 0.002947298 s (achieved 37.05088660868361 GB/s) +Achieved 69.48737453762735 GFLOPs +Completed in 0.002947105 s (achieved 37.05331299699196 GB/s) +Achieved 69.49192512652246 GFLOPs +Completed in 0.0029507160000000004 s (achieved 37.007968235506226 GB/s) +Achieved 69.40688293959839 GFLOPs +Completed in 0.002948272 s (achieved 37.03864636641396 GB/s) +Achieved 69.46441847970608 GFLOPs +Completed in 0.002945346 s (achieved 37.07544173078477 GB/s) +Achieved 69.53342663306789 GFLOPs +Completed in 0.002946586 s (achieved 37.059839420943426 GB/s) +Achieved 69.50416515927246 GFLOPs +Completed in 0.002964506 s (achieved 36.83581817678898 GB/s) +Achieved 69.08402276804297 GFLOPs +Completed in 0.00294719 s (achieved 37.05224434121994 GB/s) +Achieved 69.48992090771209 GFLOPs +Completed in 0.0029476850000000002 s (achieved 37.04602221743504 GB/s) +Achieved 69.47825157708506 GFLOPs +Completed in 0.002948112 s (achieved 37.04065652865291 GB/s) +Achieved 69.46818845416999 GFLOPs +Durations: [0.002947298, 0.002947105, 0.0029507160000000004, 0.002948272, 0.002945346, 0.002946586, 0.002964506, 0.00294719, 0.0029476850000000002, 0.002948112] +Median duration 0.0029476850000000002 (37.04602221743504 GB/s) 8.124127679262068% of peak. +Median achieved 69.47825157708506 GFLOPs +./sycl_spmm 100000 100000 32 128 j 64 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 32 columns. +Using method j with WG size 64 +OK! +Completed in 0.003360651 s (achieved 38.20688432092473 GB/s) +Achieved 243.76229486489373 GFLOPs +Completed in 0.003360656 s (achieved 38.2068274765403 GB/s) +Achieved 243.76193219419068 GFLOPs +Completed in 0.003363052 s (achieved 38.179607094983965 GB/s) +Achieved 243.58826446929754 GFLOPs +Completed in 0.003360431 s (achieved 38.209385641306135 GB/s) +Achieved 243.77825344427546 GFLOPs +Completed in 0.003362238 s (achieved 38.18885040261873 GB/s) +Achieved 243.64723734607725 GFLOPs +Completed in 0.003359602 s (achieved 38.218814014279076 GB/s) +Achieved 243.83840704940644 GFLOPs +Completed in 0.003359842 s (achieved 38.21608397061529 GB/s) +Achieved 243.82098920127794 GFLOPs +Completed in 0.0033615510000000004 s (achieved 38.19665505595483 GB/s) +Achieved 243.6970315190815 GFLOPs +Completed in 0.00336163 s (achieved 38.19575741530151 GB/s) +Achieved 243.691304515964 GFLOPs +Completed in 0.00336067 s (achieved 38.206668313163746 GB/s) +Achieved 243.76091672196318 GFLOPs +Durations: [0.003360651, 0.003360656, 0.003363052, 0.003360431, 0.003362238, 0.003359602, 0.003359842, 0.0033615510000000004, 0.00336163, 0.00336067] +Median duration 0.00336067 (38.206668313163746 GB/s) 8.378655331834155% of peak. +Median achieved 243.76091672196318 GFLOPs +./sycl_spmm 100000 100000 64 128 j 64 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 64 columns. +Using method j with WG size 64 +OK! +Completed in 0.005451092 s (achieved 28.251220856298147 GB/s) +Achieved 300.563630186392 GFLOPs +Completed in 0.005452276000000001 s (achieved 28.245085905409038 GB/s) +Achieved 300.4983606846022 GFLOPs +Completed in 0.005456494 s (achieved 28.223251780355664 GB/s) +Achieved 300.26606828487303 GFLOPs +Completed in 0.005453937000000001 s (achieved 28.236483846439732 GB/s) +Achieved 300.4068437167499 GFLOPs +Completed in 0.005454037 s (achieved 28.235966129309354 GB/s) +Achieved 300.40133574451363 GFLOPs +Completed in 0.005453783 s (achieved 28.23728116795259 GB/s) +Achieved 300.4153263890404 GFLOPs +Completed in 0.005451872 s (achieved 28.247178950643008 GB/s) +Achieved 300.5206285107207 GFLOPs +Completed in 0.0054514590000000005 s (achieved 28.249318943791007 GB/s) +Achieved 300.5433958138546 GFLOPs +Completed in 0.005448095 s (achieved 28.26676186813923 GB/s) +Achieved 300.72897040158074 GFLOPs +Completed in 0.005451946 s (achieved 28.246795547864927 GB/s) +Achieved 300.5165495036085 GFLOPs +Durations: [0.005451092, 0.005452276000000001, 0.005456494, 0.005453937000000001, 0.005454037, 0.005453783, 0.005451872, 0.0054514590000000005, 0.005448095, 0.005451946] +Median duration 0.005452276000000001 (28.245085905409038 GB/s) 6.194097786273911% of peak. +Median achieved 300.4983606846022 GFLOPs +./sycl_spmm 100000 100000 128 128 j 64 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 128 columns. +Using method j with WG size 64 +OK! +Completed in 0.009355922000000001 s (achieved 21.932633042472993 GB/s) +Achieved 350.23806312194563 GFLOPs +Completed in 0.009358812000000001 s (achieved 21.925860248074223 GB/s) +Achieved 350.12990965092575 GFLOPs +Completed in 0.009354727 s (achieved 21.935434780726364 GB/s) +Achieved 350.2828035494783 GFLOPs +Completed in 0.009353059 s (achieved 21.93934668860744 GB/s) +Achieved 350.3452720655349 GFLOPs +Completed in 0.009358568000000001 s (achieved 21.926431907103737 GB/s) +Achieved 350.13903836569864 GFLOPs +Completed in 0.009356921 s (achieved 21.93029138538201 GB/s) +Achieved 350.2006696433581 GFLOPs +Completed in 0.009355323 s (achieved 21.934037338956657 GB/s) +Achieved 350.2604880665264 GFLOPs +Completed in 0.009476733000000001 s (achieved 21.65303211560355 GB/s) +Achieved 345.77316887581406 GFLOPs +Completed in 0.009364212 s (achieved 21.913216403045983 GB/s) +Achieved 349.92800248435213 GFLOPs +Completed in 0.009356249 s (achieved 21.93186649906389 GB/s) +Achieved 350.2258223354252 GFLOPs +Durations: [0.009355922000000001, 0.009358812000000001, 0.009354727, 0.009353059, 0.009358568000000001, 0.009356921, 0.009355323, 0.009476733000000001, 0.009364212, 0.009356249] +Median duration 0.009356921 (21.93029138538201 GB/s) 4.809274426618861% of peak. +Median achieved 350.2006696433581 GFLOPs +./sycl_spmm 100000 100000 256 128 j 64 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 256 columns. +Using method j with WG size 64 +OK! +Completed in 0.017378644000000002 s (achieved 17.699885215440283 GB/s) +Achieved 377.10652223499136 GFLOPs +Completed in 0.017386290000000002 s (achieved 17.692101305108796 GB/s) +Achieved 376.94068142197096 GFLOPs +Completed in 0.017382088 s (achieved 17.69637824868911 GB/s) +Achieved 377.03180423433594 GFLOPs +Completed in 0.017389591 s (achieved 17.688742880726753 GB/s) +Achieved 376.8691282043379 GFLOPs +Completed in 0.017392293 s (achieved 17.685994825409164 GB/s) +Achieved 376.8105792605955 GFLOPs +Completed in 0.017385114 s (achieved 17.69329807098188 GB/s) +Achieved 376.96617922666485 GFLOPs +Completed in 0.017384491000000002 s (achieved 17.6939321375587 GB/s) +Achieved 376.9796883900713 GFLOPs +Completed in 0.017386838 s (achieved 17.691543683791153 GB/s) +Achieved 376.9288009700211 GFLOPs +Completed in 0.017380673000000003 s (achieved 17.697818950969275 GB/s) +Achieved 377.0624992484468 GFLOPs +Completed in 0.017385234 s (achieved 17.693175944597584 GB/s) +Achieved 376.9635772518219 GFLOPs +Durations: [0.017378644000000002, 0.017386290000000002, 0.017382088, 0.017389591, 0.017392293, 0.017385114, 0.017384491000000002, 0.017386838, 0.017380673000000003, 0.017385234] +Median duration 0.017385234 (17.693175944597584 GB/s) 3.8800824439906982% of peak. +Median achieved 376.9635772518219 GFLOPs +./sycl_spmm 100000 100000 512 128 j 64 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 512 columns. +Using method j with WG size 64 +OK! +Completed in 0.035244806000000004 s (achieved 14.538312510501548 GB/s) +Achieved 371.8902580993069 GFLOPs +Completed in 0.035231611 s (achieved 14.543757422843935 GB/s) +Achieved 372.0295390409482 GFLOPs +Completed in 0.035229206 s (achieved 14.544750284749536 GB/s) +Achieved 372.0549364637966 GFLOPs +Completed in 0.035228274000000004 s (achieved 14.54513508098637 GB/s) +Achieved 372.06477955746567 GFLOPs +Completed in 0.035209301000000005 s (achieved 14.552972920422363 GB/s) +Achieved 372.2652716110439 GFLOPs +Completed in 0.035263969 s (achieved 14.53041216092267 GB/s) +Achieved 371.6881670353102 GFLOPs +Completed in 0.035207681000000005 s (achieved 14.553642541807852 GB/s) +Achieved 372.28240053640565 GFLOPs +Completed in 0.035217284 s (achieved 14.549674074809404 GB/s) +Achieved 372.18088708941895 GFLOPs +Completed in 0.035187309 s (achieved 14.562068500322093 GB/s) +Achieved 372.4979366850702 GFLOPs +Completed in 0.035241715 s (achieved 14.53958764492591 GB/s) +Achieved 371.9228760575358 GFLOPs +Durations: [0.035244806000000004, 0.035231611, 0.035229206, 0.035228274000000004, 0.035209301000000005, 0.035263969, 0.035207681000000005, 0.035217284, 0.035187309, 0.035241715] +Median duration 0.035229206 (14.544750284749536 GB/s) 3.1896382203398104% of peak. +Median achieved 372.0549364637966 GFLOPs +./sycl_spmm 100000 100000 1 16 j 128 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 1 columns. +Using method j with WG size 128 +OK! +Completed in 0.0014414940000000002 s (achieved 9.712148645780003 GB/s) +Achieved 2.2199190562014133 GFLOPs +Completed in 0.00144395 s (achieved 9.695629350046747 GB/s) +Achieved 2.2161432182554797 GFLOPs +Completed in 0.001442818 s (achieved 9.703236305618589 GB/s) +Achieved 2.2178819504608343 GFLOPs +Completed in 0.001443251 s (achieved 9.700325168664357 GB/s) +Achieved 2.2172165479185533 GFLOPs +Completed in 0.0014424470000000001 s (achieved 9.705731995698976 GB/s) +Achieved 2.218452393744796 GFLOPs +Completed in 0.0014458250000000002 s (achieved 9.683055694845503 GB/s) +Achieved 2.213269240744903 GFLOPs +Completed in 0.001443058 s (achieved 9.701622526606693 GB/s) +Achieved 2.217513086792076 GFLOPs +Completed in 0.0014402990000000001 s (achieved 9.720206707079571 GB/s) +Achieved 2.221760898257931 GFLOPs +Completed in 0.0014430390000000001 s (achieved 9.701750264545865 GB/s) +Achieved 2.2175422840269734 GFLOPs +Completed in 0.001442591 s (achieved 9.70476316572057 GB/s) +Achieved 2.218230946955859 GFLOPs +Durations: [0.0014414940000000002, 0.00144395, 0.001442818, 0.001443251, 0.0014424470000000001, 0.0014458250000000002, 0.001443058, 0.0014402990000000001, 0.0014430390000000001, 0.001442591] +Median duration 0.0014430390000000001 (9.701750264545865 GB/s) 2.1275768124004086% of peak. +Median achieved 2.2175422840269734 GFLOPs +./sycl_spmm 100000 100000 8 16 j 128 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 8 columns. +Using method j with WG size 128 +OK! +Completed in 0.000618995 s (achieved 31.664236383169495 GB/s) +Achieved 41.35736152957617 GFLOPs +Completed in 0.0006198940000000001 s (achieved 31.618315389405282 GB/s) +Achieved 41.297383100981776 GFLOPs +Completed in 0.000619555 s (achieved 31.635615885595307 GB/s) +Achieved 41.31997966282251 GFLOPs +Completed in 0.000619039 s (achieved 31.66198575534013 GB/s) +Achieved 41.35442193464386 GFLOPs +Completed in 0.000620057 s (achieved 31.61000359644355 GB/s) +Achieved 41.286526883818745 GFLOPs +Completed in 0.0006202600000000001 s (achieved 31.599658207848318 GB/s) +Achieved 41.27301454228871 GFLOPs +Completed in 0.000618894 s (achieved 31.66940380743714 GB/s) +Achieved 41.36411081703813 GFLOPs +Completed in 0.000617529 s (achieved 31.739406570379693 GB/s) +Achieved 41.45554297854838 GFLOPs +Completed in 0.000619066 s (achieved 31.660604846656092 GB/s) +Achieved 41.35261829917973 GFLOPs +Completed in 0.000620071 s (achieved 31.6092899038981 GB/s) +Achieved 41.2855947141537 GFLOPs +Durations: [0.000618995, 0.0006198940000000001, 0.000619555, 0.000619039, 0.000620057, 0.0006202600000000001, 0.000618894, 0.000617529, 0.000619066, 0.000620071] +Median duration 0.000619555 (31.635615885595307 GB/s) 6.937635062630549% of peak. +Median achieved 41.31997966282251 GFLOPs +./sycl_spmm 100000 100000 32 16 j 128 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 32 columns. +Using method j with WG size 128 +OK! +Completed in 0.000662192 s (achieved 58.5932841230338 GB/s) +Achieved 154.637929784715 GFLOPs +Completed in 0.000662579 s (achieved 58.55906088179673 GB/s) +Achieved 154.5476086625142 GFLOPs +Completed in 0.0006626480000000001 s (achieved 58.552963262546626 GB/s) +Achieved 154.5315159783173 GFLOPs +Completed in 0.0006619130000000001 s (achieved 58.61798151720845 GB/s) +Achieved 154.70311052963152 GFLOPs +Completed in 0.000662901 s (achieved 58.530616185523925 GB/s) +Achieved 154.47253813163655 GFLOPs +Completed in 0.0006610730000000001 s (achieved 58.69246512866203 GB/s) +Achieved 154.89968581382084 GFLOPs +Completed in 0.000662116 s (achieved 58.600009665979975 GB/s) +Achieved 154.65567966942348 GFLOPs +Completed in 0.000663052 s (achieved 58.517286728642695 GB/s) +Achieved 154.43735936246327 GFLOPs +Completed in 0.0006618370000000001 s (achieved 58.624712731382495 GB/s) +Achieved 154.72087538170274 GFLOPs +Completed in 0.000662424 s (achieved 58.57276306414019 GB/s) +Achieved 154.5837711194039 GFLOPs +Durations: [0.000662192, 0.000662579, 0.0006626480000000001, 0.0006619130000000001, 0.000662901, 0.0006610730000000001, 0.000662116, 0.000663052, 0.0006618370000000001, 0.000662424] +Median duration 0.000662424 (58.57276306414019 GB/s) 12.844904180732497% of peak. +Median achieved 154.5837711194039 GFLOPs +./sycl_spmm 100000 100000 64 16 j 128 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 64 columns. +Using method j with WG size 128 +OK! +Completed in 0.000907951 s (achieved 70.92894220062537 GB/s) +Achieved 225.56283323659537 GFLOPs +Completed in 0.000908075 s (achieved 70.91925666932798 GB/s) +Achieved 225.5320320458112 GFLOPs +Completed in 0.0009081820000000001 s (achieved 70.91090111893872 GB/s) +Achieved 225.50546035926718 GFLOPs +Completed in 0.0009092270000000001 s (achieved 70.82940123863459 GB/s) +Achieved 225.2462806317894 GFLOPs +Completed in 0.00090795 s (achieved 70.92902032050225 GB/s) +Achieved 225.5630816674927 GFLOPs +Completed in 0.0009084710000000001 s (achieved 70.88834316120163 GB/s) +Achieved 225.43372325588817 GFLOPs +Completed in 0.0009087790000000001 s (achieved 70.86431794748779 GB/s) +Achieved 225.35732009652511 GFLOPs +Completed in 0.0009092970000000001 s (achieved 70.82394861084992 GB/s) +Achieved 225.22894059916615 GFLOPs +Completed in 0.0009084920000000001 s (achieved 70.88670456096477 GB/s) +Achieved 225.4285123039058 GFLOPs +Completed in 0.0009100590000000001 s (achieved 70.76464712727417 GB/s) +Achieved 225.04035452646477 GFLOPs +Durations: [0.000907951, 0.000908075, 0.0009081820000000001, 0.0009092270000000001, 0.00090795, 0.0009084710000000001, 0.0009087790000000001, 0.0009092970000000001, 0.0009084920000000001, 0.0009100590000000001] +Median duration 0.0009084920000000001 (70.88670456096477 GB/s) 15.545329947579994% of peak. +Median achieved 225.4285123039058 GFLOPs +./sycl_spmm 100000 100000 128 16 j 128 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 128 columns. +Using method j with WG size 128 +OK! +Completed in 0.0014444190000000002 s (achieved 80.03218179766397 GB/s) +Achieved 283.5742260382894 GFLOPs +Completed in 0.0014427580000000001 s (achieved 80.12432022556797 GB/s) +Achieved 283.9006957507773 GFLOPs +Completed in 0.001444168 s (achieved 80.04609159045208 GB/s) +Achieved 283.6235119459786 GFLOPs +Completed in 0.0014442320000000002 s (achieved 80.04254441114723 GB/s) +Achieved 283.6109433941361 GFLOPs +Completed in 0.0014535650000000002 s (achieved 79.5286100036806 GB/s) +Achieved 281.7899440341505 GFLOPs +Completed in 0.0014449360000000002 s (achieved 80.0035461778238 GB/s) +Achieved 283.47276280748764 GFLOPs +Completed in 0.0014438760000000002 s (achieved 80.06227958633566 GB/s) +Achieved 283.68087010241874 GFLOPs +Completed in 0.001444512 s (achieved 80.02702919740369 GB/s) +Achieved 283.55596907467714 GFLOPs +Completed in 0.001444313 s (achieved 80.03805546304714 GB/s) +Achieved 283.5950379176813 GFLOPs +Completed in 0.0014427950000000002 s (achieved 80.12226546390859 GB/s) +Achieved 283.8934152114472 GFLOPs +Durations: [0.0014444190000000002, 0.0014427580000000001, 0.001444168, 0.0014442320000000002, 0.0014535650000000002, 0.0014449360000000002, 0.0014438760000000002, 0.001444512, 0.001444313, 0.0014427950000000002] +Median duration 0.001444313 (80.03805546304714 GB/s) 17.552205145405075% of peak. +Median achieved 283.5950379176813 GFLOPs +./sycl_spmm 100000 100000 256 16 j 128 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 256 columns. +Using method j with WG size 128 +OK! +Completed in 0.0026061590000000003 s (achieved 83.64800612702449 GB/s) +Achieved 314.33231817398706 GFLOPs +Completed in 0.0026046430000000002 s (achieved 83.69669240659853 GB/s) +Achieved 314.51527138268085 GFLOPs +Completed in 0.002605268 s (achieved 83.67661369195032 GB/s) +Achieved 314.4398196270019 GFLOPs +Completed in 0.0026071040000000003 s (achieved 83.61768613756874 GB/s) +Achieved 314.21838177533385 GFLOPs +Completed in 0.0026068890000000003 s (achieved 83.62458240454427 GB/s) +Achieved 314.2442965542453 GFLOPs +Completed in 0.0026057520000000002 s (achieved 83.66107135291463 GB/s) +Achieved 314.3814146549633 GFLOPs +Completed in 0.0026062900000000003 s (achieved 83.64380172582483 GB/s) +Achieved 314.3165188831634 GFLOPs +Completed in 0.002606391 s (achieved 83.64056045313232 GB/s) +Achieved 314.30433883481027 GFLOPs +Completed in 0.002605117 s (achieved 83.68146382676863 GB/s) +Achieved 314.4580454543884 GFLOPs +Completed in 0.002605605 s (achieved 83.66579124617893 GB/s) +Achieved 314.3991510608861 GFLOPs +Durations: [0.0026061590000000003, 0.0026046430000000002, 0.002605268, 0.0026071040000000003, 0.0026068890000000003, 0.0026057520000000002, 0.0026062900000000003, 0.002606391, 0.002605117, 0.002605605] +Median duration 0.0026061590000000003 (83.64800612702449 GB/s) 18.343860992768526% of peak. +Median achieved 314.33231817398706 GFLOPs +./sycl_spmm 100000 100000 512 16 j 128 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 512 columns. +Using method j with WG size 128 +OK! +Completed in 0.004955785 s (achieved 85.31443636073801 GB/s) +Achieved 330.60352698916523 GFLOPs +Completed in 0.004955999 s (achieved 85.31075248400978 GB/s) +Achieved 330.589251531326 GFLOPs +Completed in 0.004956259 s (achieved 85.30627717397336 GB/s) +Achieved 330.57190917585217 GFLOPs +Completed in 0.00495862 s (achieved 85.26565939717098 GB/s) +Achieved 330.4145104888053 GFLOPs +Completed in 0.004954396000000001 s (achieved 85.33835486707157 GB/s) +Achieved 330.6962140289149 GFLOPs +Completed in 0.004955462 s (achieved 85.31999720712217 GB/s) +Achieved 330.6250759263213 GFLOPs +Completed in 0.004955868 s (achieved 85.31300752965979 GB/s) +Achieved 330.597990099817 GFLOPs +Completed in 0.004954371 s (achieved 85.33878548861198 GB/s) +Achieved 330.6978827382931 GFLOPs +Completed in 0.004956646 s (achieved 85.29961671662652 GB/s) +Achieved 330.5460991162169 GFLOPs +Completed in 0.004952435000000001 s (achieved 85.37214602513711 GB/s) +Achieved 330.8271587612962 GFLOPs +Durations: [0.004955785, 0.004955999, 0.004956259, 0.00495862, 0.004954396000000001, 0.004955462, 0.004955868, 0.004954371, 0.004956646, 0.004952435000000001] +Median duration 0.004955868 (85.31300752965979 GB/s) 18.708992879311356% of peak. +Median achieved 330.597990099817 GFLOPs +./sycl_spmm 100000 100000 1 32 j 128 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 1 columns. +Using method j with WG size 128 +OK! +Completed in 0.002954304 s (achieved 9.071511936483178 GB/s) +Achieved 2.1663308853794327 GFLOPs +Completed in 0.0029558640000000003 s (achieved 9.066724314785795 GB/s) +Achieved 2.1651875729059253 GFLOPs +Completed in 0.002959925 s (achieved 9.054284821405949 GB/s) +Achieved 2.1622169480645628 GFLOPs +Completed in 0.002964145 s (achieved 9.04139439872206 GB/s) +Achieved 2.159138638629352 GFLOPs +Completed in 0.0029610820000000003 s (achieved 9.050746990458217 GB/s) +Achieved 2.1613720930389633 GFLOPs +Completed in 0.0029562860000000002 s (achieved 9.065430070027054 GB/s) +Achieved 2.164878499576834 GFLOPs +Completed in 0.0029617700000000003 s (achieved 9.048644560516177 GB/s) +Achieved 2.1608700202919198 GFLOPs +Completed in 0.0029600380000000003 s (achieved 9.053939172402517 GB/s) +Achieved 2.1621344050312867 GFLOPs +Completed in 0.0029543810000000003 s (achieved 9.071275505765843 GB/s) +Achieved 2.1662744243210335 GFLOPs +Completed in 0.002954505 s (achieved 9.07089478609784 GB/s) +Achieved 2.166183506204931 GFLOPs +Durations: [0.002954304, 0.0029558640000000003, 0.002959925, 0.002964145, 0.0029610820000000003, 0.0029562860000000002, 0.0029617700000000003, 0.0029600380000000003, 0.0029543810000000003, 0.002954505] +Median duration 0.002959925 (9.054284821405949 GB/s) 1.9855887766241114% of peak. +Median achieved 2.1622169480645628 GFLOPs +./sycl_spmm 100000 100000 8 32 j 128 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 8 columns. +Using method j with WG size 128 +OK! +Completed in 0.00114579 s (achieved 28.277436528508716 GB/s) +Achieved 44.68532628143028 GFLOPs +Completed in 0.001144298 s (achieved 28.314306238409927 GB/s) +Achieved 44.74358951951328 GFLOPs +Completed in 0.001144491 s (achieved 28.309531486049256 GB/s) +Achieved 44.73604423276373 GFLOPs +Completed in 0.001146766 s (achieved 28.25336991155999 GB/s) +Achieved 44.647295088971944 GFLOPs +Completed in 0.001146012 s (achieved 28.27195875784896 GB/s) +Achieved 44.67667005232057 GFLOPs +Completed in 0.001146251 s (achieved 28.2660638900206 GB/s) +Achieved 44.66735470677888 GFLOPs +Completed in 0.001144559 s (achieved 28.30784957350386 GB/s) +Achieved 44.73338639598308 GFLOPs +Completed in 0.0011446140000000002 s (achieved 28.306489349247865 GB/s) +Achieved 44.731236906066144 GFLOPs +Completed in 0.001145978 s (achieved 28.27279755806831 GB/s) +Achieved 44.677995563614665 GFLOPs +Completed in 0.001145083 s (achieved 28.294895653852166 GB/s) +Achieved 44.71291600696194 GFLOPs +Durations: [0.00114579, 0.001144298, 0.001144491, 0.001146766, 0.001146012, 0.001146251, 0.001144559, 0.0011446140000000002, 0.001145978, 0.001145083] +Median duration 0.00114579 (28.277436528508716 GB/s) 6.201192221164192% of peak. +Median achieved 44.68532628143028 GFLOPs +./sycl_spmm 100000 100000 32 32 j 128 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 32 columns. +Using method j with WG size 128 +OK! +Completed in 0.0012292540000000002 s (achieved 41.97668179237163 GB/s) +Achieved 166.605111718164 GFLOPs +Completed in 0.0012283040000000002 s (achieved 42.00914757258789 GB/s) +Achieved 166.73396813818076 GFLOPs +Completed in 0.00122828 s (achieved 42.00996841111148 GB/s) +Achieved 166.73722603966525 GFLOPs +Completed in 0.001229878 s (achieved 41.95538419257846 GB/s) +Achieved 166.52058171623528 GFLOPs +Completed in 0.00122819 s (achieved 42.01304684128677 GB/s) +Achieved 166.74944430422002 GFLOPs +Completed in 0.001228621 s (achieved 41.99830867289425 GB/s) +Achieved 166.690948632654 GFLOPs +Completed in 0.0012306950000000002 s (achieved 41.927532004274006 GB/s) +Achieved 166.41003660533275 GFLOPs +Completed in 0.0012300640000000001 s (achieved 41.94904004994862 GB/s) +Achieved 166.4954018652688 GFLOPs +Completed in 0.001230009 s (achieved 41.950915806307115 GB/s) +Achieved 166.50284672713778 GFLOPs +Completed in 0.0012303610000000001 s (achieved 41.9389138634921 GB/s) +Achieved 166.45521111283597 GFLOPs +Durations: [0.0012292540000000002, 0.0012283040000000002, 0.00122828, 0.001229878, 0.00122819, 0.001228621, 0.0012306950000000002, 0.0012300640000000001, 0.001230009, 0.0012303610000000001] +Median duration 0.001229878 (41.95538419257846 GB/s) 9.200742147495275% of peak. +Median achieved 166.52058171623528 GFLOPs +./sycl_spmm 100000 100000 64 32 j 128 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 64 columns. +Using method j with WG size 128 +OK! +Completed in 0.0017225040000000001 s (achieved 44.8184758932345 GB/s) +Achieved 237.79335200382698 GFLOPs +Completed in 0.0017204850000000001 s (achieved 44.87107065740184 GB/s) +Achieved 238.0724040023598 GFLOPs +Completed in 0.0017221810000000001 s (achieved 44.826881727298115 GB/s) +Achieved 237.83795083095214 GFLOPs +Completed in 0.001721288 s (achieved 44.85013780378414 GB/s) +Achieved 237.961340577521 GFLOPs +Completed in 0.001721943 s (achieved 44.833077517664634 GB/s) +Achieved 237.87082383098627 GFLOPs +Completed in 0.00172112 s (achieved 44.85451566421865 GB/s) +Achieved 237.98456818815654 GFLOPs +Completed in 0.0017221720000000001 s (achieved 44.827115990737276 GB/s) +Achieved 237.8391937622955 GFLOPs +Completed in 0.001721387 s (achieved 44.84755839331888 GB/s) +Achieved 237.94765500146104 GFLOPs +Completed in 0.001721522 s (achieved 44.84404149351562 GB/s) +Achieved 237.92899538896395 GFLOPs +Completed in 0.0017228310000000002 s (achieved 44.80996917283239 GB/s) +Achieved 237.74821790413569 GFLOPs +Durations: [0.0017225040000000001, 0.0017204850000000001, 0.0017221810000000001, 0.001721288, 0.001721943, 0.00172112, 0.0017221720000000001, 0.001721387, 0.001721522, 0.0017228310000000002] +Median duration 0.001721943 (44.833077517664634 GB/s) 9.831815245101893% of peak. +Median achieved 237.87082383098627 GFLOPs +./sycl_spmm 100000 100000 128 32 j 128 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 128 columns. +Using method j with WG size 128 +OK! +Completed in 0.0027920500000000003 s (achieved 45.987716552354 GB/s) +Achieved 293.40448774198165 GFLOPs +Completed in 0.002791806 s (achieved 45.99173581545423 GB/s) +Achieved 293.43013089018365 GFLOPs +Completed in 0.0027906740000000004 s (achieved 46.01039175482339 GB/s) +Achieved 293.5491569420147 GFLOPs +Completed in 0.002792674 s (achieved 45.977440975924864 GB/s) +Achieved 293.33892892618326 GFLOPs +Completed in 0.0027923600000000002 s (achieved 45.98261112464009 GB/s) +Achieved 293.37191479608646 GFLOPs +Completed in 0.0027919620000000003 s (achieved 45.98916604165816 GB/s) +Achieved 293.41373557376494 GFLOPs +Completed in 0.002792225 s (achieved 45.98483431671875 GB/s) +Achieved 293.38609889962305 GFLOPs +Completed in 0.002791345 s (achieved 45.99933150506297 GB/s) +Achieved 293.4785918616294 GFLOPs +Completed in 0.002791477 s (achieved 45.99715634411461 GB/s) +Achieved 293.46471419968714 GFLOPs +Completed in 0.002792624 s (achieved 45.97826417018546 GB/s) +Achieved 293.3441809566916 GFLOPs +Durations: [0.0027920500000000003, 0.002791806, 0.0027906740000000004, 0.002792674, 0.0027923600000000002, 0.0027919620000000003, 0.002792225, 0.002791345, 0.002791477, 0.002792624] +Median duration 0.0027920500000000003 (45.987716552354 GB/s) 10.085025559726754% of peak. +Median achieved 293.40448774198165 GFLOPs +./sycl_spmm 100000 100000 256 32 j 128 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 256 columns. +Using method j with WG size 128 +OK! +Completed in 0.004941497 s (achieved 46.70649481321146 GB/s) +Achieved 331.55944443556274 GFLOPs +Completed in 0.004939613000000001 s (achieved 46.724308969143934 GB/s) +Achieved 331.685903328864 GFLOPs +Completed in 0.004941234 s (achieved 46.70898079305696 GB/s) +Achieved 331.5770918762398 GFLOPs +Completed in 0.004938976 s (achieved 46.73033519498779 GB/s) +Achieved 331.728682220768 GFLOPs +Completed in 0.0049545200000000005 s (achieved 46.58372637510798 GB/s) +Achieved 330.6879374793118 GFLOPs +Completed in 0.004939712000000001 s (achieved 46.72337253669849 GB/s) +Achieved 331.67925579466976 GFLOPs +Completed in 0.0049401210000000004 s (achieved 46.719504238863784 GB/s) +Achieved 331.6517955734283 GFLOPs +Completed in 0.004939527 s (achieved 46.72512246617946 GB/s) +Achieved 331.6916781708046 GFLOPs +Completed in 0.004941306 s (achieved 46.7083001943211 GB/s) +Achieved 331.57226045098196 GFLOPs +Completed in 0.004941801 s (achieved 46.703621614872795 GB/s) +Achieved 331.53904821339427 GFLOPs +Durations: [0.004941497, 0.004939613000000001, 0.004941234, 0.004938976, 0.0049545200000000005, 0.004939712000000001, 0.0049401210000000004, 0.004939527, 0.004941306, 0.004941801] +Median duration 0.004941234 (46.70898079305696 GB/s) 10.243197542337052% of peak. +Median achieved 331.5770918762398 GFLOPs +./sycl_spmm 100000 100000 512 32 j 128 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 512 columns. +Using method j with WG size 128 +OK! +Completed in 0.009596025000000001 s (achieved 45.393796285441105 GB/s) +Achieved 341.4747252117413 GFLOPs +Completed in 0.009595831 s (achieved 45.39471401695174 GB/s) +Achieved 341.4816288448598 GFLOPs +Completed in 0.009593648 s (achieved 45.4050434203965 GB/s) +Achieved 341.55933175784645 GFLOPs +Completed in 0.00959741 s (achieved 45.387245517280185 GB/s) +Achieved 341.42544707374174 GFLOPs +Completed in 0.009596815 s (achieved 45.39005951453686 GB/s) +Achieved 341.44661536145065 GFLOPs +Completed in 0.009599117 s (achieved 45.37917435530789 GB/s) +Achieved 341.3647317768916 GFLOPs +Completed in 0.009597334 s (achieved 45.387604932786545 GB/s) +Achieved 341.4281507760384 GFLOPs +Completed in 0.009592409000000001 s (achieved 45.4109081462227 GB/s) +Achieved 341.6034491440054 GFLOPs +Completed in 0.009597466 s (achieved 45.38698068844422 GB/s) +Achieved 341.4234548994495 GFLOPs +Completed in 0.009600492 s (achieved 45.37267506706948 GB/s) +Achieved 341.3158408964874 GFLOPs +Durations: [0.009596025000000001, 0.009595831, 0.009593648, 0.00959741, 0.009596815, 0.009599117, 0.009597334, 0.009592409000000001, 0.009597466, 0.009600492] +Median duration 0.009597334 (45.387604932786545 GB/s) 9.953422134383013% of peak. +Median achieved 341.4281507760384 GFLOPs +./sycl_spmm 100000 100000 1 64 j 128 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 1 columns. +Using method j with WG size 128 +OK! +Completed in 0.005969234 s (achieved 8.778346434400126 GB/s) +Achieved 2.144328736316921 GFLOPs +Completed in 0.005967712 s (achieved 8.78058525612496 GB/s) +Achieved 2.144875624024752 GFLOPs +Completed in 0.00596319 s (achieved 8.787243740347028 GB/s) +Achieved 2.1465021238632342 GFLOPs +Completed in 0.0059671820000000006 s (achieved 8.781365140195152 GB/s) +Achieved 2.1450661300426233 GFLOPs +Completed in 0.0059617310000000005 s (achieved 8.789394221242118 GB/s) +Achieved 2.1470274321333855 GFLOPs +Completed in 0.005978272000000001 s (achieved 8.765075259205334 GB/s) +Achieved 2.1410869227763474 GFLOPs +Completed in 0.005975799 s (achieved 8.76870256178295 GB/s) +Achieved 2.1419729813536232 GFLOPs +Completed in 0.0059701680000000005 s (achieved 8.776973110304432 GB/s) +Achieved 2.1439932678611386 GFLOPs +Completed in 0.005957383 s (achieved 8.795809166541751 GB/s) +Achieved 2.148594441552608 GFLOPs +Completed in 0.005978317 s (achieved 8.765009282712843 GB/s) +Achieved 2.1410708063824653 GFLOPs +Durations: [0.005969234, 0.005967712, 0.00596319, 0.0059671820000000006, 0.0059617310000000005, 0.005978272000000001, 0.005975799, 0.0059701680000000005, 0.005957383, 0.005978317] +Median duration 0.005969234 (8.778346434400126 GB/s) 1.925075972456168% of peak. +Median achieved 2.144328736316921 GFLOPs +./sycl_spmm 100000 100000 8 64 j 128 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 8 columns. +Using method j with WG size 128 +OK! +Completed in 0.0022100780000000003 s (achieved 26.243419463023475 GB/s) +Achieved 46.333206339323766 GFLOPs +Completed in 0.0022107940000000003 s (achieved 26.234920123720254 GB/s) +Achieved 46.318200610278474 GFLOPs +Completed in 0.002212342 s (achieved 26.216563261918818 GB/s) +Achieved 46.285791256505554 GFLOPs +Completed in 0.002212248 s (achieved 26.217677222445225 GB/s) +Achieved 46.2877579728855 GFLOPs +Completed in 0.002211293 s (achieved 26.228999956134263 GB/s) +Achieved 46.307748453054394 GFLOPs +Completed in 0.002211563 s (achieved 26.22579777288732 GB/s) +Achieved 46.30209494371175 GFLOPs +Completed in 0.002210464 s (achieved 26.238836732921232 GB/s) +Achieved 46.32511545087366 GFLOPs +Completed in 0.002211493 s (achieved 26.226627893463824 GB/s) +Achieved 46.303560535800926 GFLOPs +Completed in 0.0022117250000000003 s (achieved 26.22387683821451 GB/s) +Achieved 46.29870350066125 GFLOPs +Completed in 0.0022108790000000002 s (achieved 26.23391148950259 GB/s) +Achieved 46.316419849299756 GFLOPs +Durations: [0.0022100780000000003, 0.0022107940000000003, 0.002212342, 0.002212248, 0.002211293, 0.002211563, 0.002210464, 0.002211493, 0.0022117250000000003, 0.0022108790000000002] +Median duration 0.002211493 (26.226627893463824 GB/s) 5.751453485408733% of peak. +Median achieved 46.303560535800926 GFLOPs +./sycl_spmm 100000 100000 32 64 j 128 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 32 columns. +Using method j with WG size 128 +OK! +Completed in 0.0023847630000000002 s (achieved 32.37219128273962 GB/s) +Achieved 171.75710961634343 GFLOPs +Completed in 0.0023872060000000002 s (achieved 32.339062485600316 GB/s) +Achieved 171.5813381836339 GFLOPs +Completed in 0.0023874540000000002 s (achieved 32.33570322192595 GB/s) +Achieved 171.56351494102086 GFLOPs +Completed in 0.002386968 s (achieved 32.34228695147987 GB/s) +Achieved 171.5984462296939 GFLOPs +Completed in 0.0023851370000000003 s (achieved 32.36711518038586 GB/s) +Achieved 171.73017734411061 GFLOPs +Completed in 0.002385986 s (achieved 32.355598063023 GB/s) +Achieved 171.66907098365203 GFLOPs +Completed in 0.0023874060000000003 s (achieved 32.336353347524465 GB/s) +Achieved 171.56696431189332 GFLOPs +Completed in 0.0023866020000000003 s (achieved 32.347246838810996 GB/s) +Achieved 171.62476189997327 GFLOPs +Completed in 0.0023874530000000003 s (achieved 32.335716765942614 GB/s) +Achieved 171.56358680149933 GFLOPs +Completed in 0.002385471 s (achieved 32.36258332211961 GB/s) +Achieved 171.70613266730135 GFLOPs +Durations: [0.0023847630000000002, 0.0023872060000000002, 0.0023874540000000002, 0.002386968, 0.0023851370000000003, 0.002385986, 0.0023874060000000003, 0.0023866020000000003, 0.0023874530000000003, 0.002385471] +Median duration 0.002386968 (32.34228695147987 GB/s) 7.092606787605234% of peak. +Median achieved 171.5984462296939 GFLOPs +./sycl_spmm 100000 100000 64 64 j 128 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 64 columns. +Using method j with WG size 128 +OK! +Completed in 0.0033843750000000002 s (achieved 30.374885761772852 GB/s) +Achieved 242.05355493998152 GFLOPs +Completed in 0.0033866390000000003 s (achieved 30.354579865170155 GB/s) +Achieved 241.8917398636229 GFLOPs +Completed in 0.0033858160000000003 s (achieved 30.36195823990435 GB/s) +Achieved 241.9505371821741 GFLOPs +Completed in 0.003385228 s (achieved 30.367231985556067 GB/s) +Achieved 241.99256298246382 GFLOPs +Completed in 0.0033866900000000004 s (achieved 30.354122757028247 GB/s) +Achieved 241.88809722767655 GFLOPs +Completed in 0.003386684 s (achieved 30.354176533742148 GB/s) +Achieved 241.88852576738782 GFLOPs +Completed in 0.003384447 s (achieved 30.37423957296421 GB/s) +Achieved 242.0484055445395 GFLOPs +Completed in 0.003386729 s (achieved 30.353773213032397 GB/s) +Achieved 241.8853117565651 GFLOPs +Completed in 0.003384554 s (achieved 30.373279315383947 GB/s) +Achieved 242.04075337548167 GFLOPs +Completed in 0.0033864480000000002 s (achieved 30.3562919023118 GB/s) +Achieved 241.90538286724023 GFLOPs +Durations: [0.0033843750000000002, 0.0033866390000000003, 0.0033858160000000003, 0.003385228, 0.0033866900000000004, 0.003386684, 0.003384447, 0.003386729, 0.003384554, 0.0033864480000000002] +Median duration 0.0033864480000000002 (30.3562919023118 GB/s) 6.657081557524517% of peak. +Median achieved 241.90538286724023 GFLOPs +./sycl_spmm 100000 100000 128 64 j 128 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 128 columns. +Using method j with WG size 128 +OK! +Completed in 0.005524788 s (achieved 27.87437346012191 GB/s) +Achieved 296.5543655249758 GFLOPs +Completed in 0.005525321000000001 s (achieved 27.871684559141446 GB/s) +Achieved 296.5257584129501 GFLOPs +Completed in 0.005522760000000001 s (achieved 27.884609144703006 GB/s) +Achieved 296.66326257161273 GFLOPs +Completed in 0.005525793 s (achieved 27.869303826618186 GB/s) +Achieved 296.5004298930488 GFLOPs +Completed in 0.0055270200000000005 s (achieved 27.863116833302573 GB/s) +Achieved 296.4346067139254 GFLOPs +Completed in 0.005529153 s (achieved 27.852367984752817 GB/s) +Achieved 296.32025013596115 GFLOPs +Completed in 0.00552372 s (achieved 27.879762913398938 GB/s) +Achieved 296.6117037069221 GFLOPs +Completed in 0.005523906 s (achieved 27.878824150881638 GB/s) +Achieved 296.60171624933514 GFLOPs +Completed in 0.005525389 s (achieved 27.871341547174325 GB/s) +Achieved 296.52210912209074 GFLOPs +Completed in 0.005525306000000001 s (achieved 27.871760224682575 GB/s) +Achieved 296.52656341567325 GFLOPs +Durations: [0.005524788, 0.005525321000000001, 0.005522760000000001, 0.005525793, 0.0055270200000000005, 0.005529153, 0.00552372, 0.005523906, 0.005525389, 0.005525306000000001] +Median duration 0.005525321000000001 (27.871684559141446 GB/s) 6.11221152612751% of peak. +Median achieved 296.5257584129501 GFLOPs +./sycl_spmm 100000 100000 256 64 j 128 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 256 columns. +Using method j with WG size 128 +OK! +Completed in 0.009622473000000001 s (achieved 26.64595722949807 GB/s) +Achieved 340.5361594675298 GFLOPs +Completed in 0.009632912 s (achieved 26.617081522181454 GB/s) +Achieved 340.16712703282246 GFLOPs +Completed in 0.009630725000000001 s (achieved 26.6231258809695 GB/s) +Achieved 340.2443741255201 GFLOPs +Completed in 0.009630020000000001 s (achieved 26.625074921962778 GB/s) +Achieved 340.26928292983814 GFLOPs +Completed in 0.009628287000000001 s (achieved 26.62986718198159 GB/s) +Achieved 340.3305281614476 GFLOPs +Completed in 0.009636694000000001 s (achieved 26.60663542912123 GB/s) +Achieved 340.03362563966436 GFLOPs +Completed in 0.009628855 s (achieved 26.628296303143003 GB/s) +Achieved 340.31045228119024 GFLOPs +Completed in 0.009627329 s (achieved 26.632517077166472 GB/s) +Achieved 340.36439390406207 GFLOPs +Completed in 0.009624807 s (achieved 26.63949562832792 GB/s) +Achieved 340.4535800042536 GFLOPs +Completed in 0.009627811 s (achieved 26.631183765447826 GB/s) +Achieved 340.34735413896266 GFLOPs +Durations: [0.009622473000000001, 0.009632912, 0.009630725000000001, 0.009630020000000001, 0.009628287000000001, 0.009636694000000001, 0.009628855, 0.009627329, 0.009624807, 0.009627811] +Median duration 0.009628855 (26.628296303143003 GB/s) 5.839538662969956% of peak. +Median achieved 340.31045228119024 GFLOPs +./sycl_spmm 100000 100000 512 64 j 128 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 512 columns. +Using method j with WG size 128 +OK! +Completed in 0.018885353 s (achieved 24.421042275460778 GB/s) +Achieved 347.0202542679504 GFLOPs +Completed in 0.018882984000000002 s (achieved 24.424106062897685 GB/s) +Achieved 347.06379034161125 GFLOPs +Completed in 0.018894811 s (achieved 24.40881806121268 GB/s) +Achieved 346.8465495632637 GFLOPs +Completed in 0.018884677000000002 s (achieved 24.42191645639478 GB/s) +Achieved 347.0326762803515 GFLOPs +Completed in 0.018895965 s (achieved 24.407327384444248 GB/s) +Achieved 346.8253672146408 GFLOPs +Completed in 0.018888304 s (achieved 24.417226872248563 GB/s) +Achieved 346.96603781895925 GFLOPs +Completed in 0.018890800000000003 s (achieved 24.414000677578503 GB/s) +Achieved 346.9201939568467 GFLOPs +Completed in 0.018885546 s (achieved 24.420792705702027 GB/s) +Achieved 347.0167079098481 GFLOPs +Completed in 0.018885942000000003 s (achieved 24.420280651078986 GB/s) +Achieved 347.0094316714517 GFLOPs +Completed in 0.018879259000000002 s (achieved 24.428925097113186 GB/s) +Achieved 347.1322682738766 GFLOPs +Durations: [0.018885353, 0.018882984000000002, 0.018894811, 0.018884677000000002, 0.018895965, 0.018888304, 0.018890800000000003, 0.018885546, 0.018885942000000003, 0.018879259000000002] +Median duration 0.018885942000000003 (24.420280651078986 GB/s) 5.355324704183988% of peak. +Median achieved 347.0094316714517 GFLOPs +./sycl_spmm 100000 100000 1 128 j 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 1 columns. +Using method j with WG size 128 +OK! +Completed in 0.011788493 s (achieved 8.788231371049719 GB/s) +Achieved 2.1716092124752504 GFLOPs +Completed in 0.011764646 s (achieved 8.806045162769879 GB/s) +Achieved 2.176011075896376 GFLOPs +Completed in 0.011796656 s (achieved 8.78215012796847 GB/s) +Achieved 2.1701065115402196 GFLOPs +Completed in 0.011772835 s (achieved 8.79991981540555 GB/s) +Achieved 2.1744974766061023 GFLOPs +Completed in 0.011740064000000001 s (achieved 8.824483750684834 GB/s) +Achieved 2.1805673291048495 GFLOPs +Completed in 0.011779283000000002 s (achieved 8.79510272399432 GB/s) +Achieved 2.1733071529056565 GFLOPs +Completed in 0.011793077 s (achieved 8.784815362436792 GB/s) +Achieved 2.1707651022714427 GFLOPs +Completed in 0.011788252 s (achieved 8.788411038379566 GB/s) +Achieved 2.1716536090338074 GFLOPs +Completed in 0.011778108 s (achieved 8.795980135349414 GB/s) +Achieved 2.173523964969586 GFLOPs +Completed in 0.011758195 s (achieved 8.81087649932664 GB/s) +Achieved 2.1772049196326475 GFLOPs +Durations: [0.011788493, 0.011764646, 0.011796656, 0.011772835, 0.011740064000000001, 0.011779283000000002, 0.011793077, 0.011788252, 0.011778108, 0.011758195] +Median duration 0.011779283000000002 (8.79510272399432 GB/s) 1.9287505973671755% of peak. +Median achieved 2.1733071529056565 GFLOPs +./sycl_spmm 100000 100000 8 128 j 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 8 columns. +Using method j with WG size 128 +OK! +Completed in 0.0043601510000000005 s (achieved 25.04500509271353 GB/s) +Achieved 46.97085032146822 GFLOPs +Completed in 0.004360271000000001 s (achieved 25.04431582348895 GB/s) +Achieved 46.96955762612002 GFLOPs +Completed in 0.004360452 s (achieved 25.043276247508288 GB/s) +Achieved 46.96760794523137 GFLOPs +Completed in 0.004361928 s (achieved 25.03480204166598 GB/s) +Achieved 46.951714929728325 GFLOPs +Completed in 0.004359158 s (achieved 25.050710251842215 GB/s) +Achieved 46.98155010669492 GFLOPs +Completed in 0.004358762 s (achieved 25.052986146066242 GB/s) +Achieved 46.98581845028473 GFLOPs +Completed in 0.0043580120000000005 s (achieved 25.057297685274843 GB/s) +Achieved 46.99390456015265 GFLOPs +Completed in 0.004359461 s (achieved 25.04896912714668 GB/s) +Achieved 46.97828470079214 GFLOPs +Completed in 0.004361409 s (achieved 25.037781139076845 GB/s) +Achieved 46.95730210122463 GFLOPs +Completed in 0.004359844 s (achieved 25.046768645850634 GB/s) +Achieved 46.974157790966835 GFLOPs +Durations: [0.0043601510000000005, 0.004360271000000001, 0.004360452, 0.004361928, 0.004359158, 0.004358762, 0.0043580120000000005, 0.004359461, 0.004361409, 0.004359844] +Median duration 0.0043601510000000005 (25.04500509271353 GB/s) 5.492325678226651% of peak. +Median achieved 46.97085032146822 GFLOPs +./sycl_spmm 100000 100000 32 128 j 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 32 columns. +Using method j with WG size 128 +OK! +Completed in 0.004715785 s (achieved 27.22770524949717 GB/s) +Achieved 173.71445051036042 GFLOPs +Completed in 0.004716466 s (achieved 27.22377390190028 GB/s) +Achieved 173.68936826852988 GFLOPs +Completed in 0.004716284 s (achieved 27.22482445925648 GB/s) +Achieved 173.69607088970892 GFLOPs +Completed in 0.004715001 s (achieved 27.232232612463925 GB/s) +Achieved 173.74333536726718 GFLOPs +Completed in 0.004714476 s (achieved 27.235265170508875 GB/s) +Achieved 173.76268327593564 GFLOPs +Completed in 0.004714257 s (achieved 27.23653038007898 GB/s) +Achieved 173.7707553915707 GFLOPs +Completed in 0.004714876000000001 s (achieved 27.232954588837543 GB/s) +Achieved 173.7479416213703 GFLOPs +Completed in 0.004716318000000001 s (achieved 27.224628195130183 GB/s) +Achieved 173.69481871239384 GFLOPs +Completed in 0.0047155140000000005 s (achieved 27.229270022313578 GB/s) +Achieved 173.72443385811175 GFLOPs +Completed in 0.004713706000000001 s (achieved 27.239714144242342 GB/s) +Achieved 173.79106800466553 GFLOPs +Durations: [0.004715785, 0.004716466, 0.004716284, 0.004715001, 0.004714476, 0.004714257, 0.004714876000000001, 0.004716318000000001, 0.0047155140000000005, 0.004713706000000001] +Median duration 0.0047155140000000005 (27.229270022313578 GB/s) 5.971331145244205% of peak. +Median achieved 173.72443385811175 GFLOPs +./sycl_spmm 100000 100000 64 128 j 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 64 columns. +Using method j with WG size 128 +OK! +Completed in 0.006716632 s (achieved 22.928158636650036 GB/s) +Achieved 243.9317800945474 GFLOPs +Completed in 0.006715159 s (achieved 22.933188030246193 GB/s) +Achieved 243.98528761567673 GFLOPs +Completed in 0.006717699000000001 s (achieved 22.924516862098166 GB/s) +Achieved 243.89303539798374 GFLOPs +Completed in 0.006715935 s (achieved 22.930538190140314 GB/s) +Achieved 243.95709607076304 GFLOPs +Completed in 0.0067156360000000005 s (achieved 22.931559125598824 GB/s) +Achieved 243.9679577630473 GFLOPs +Completed in 0.006719317000000001 s (achieved 22.918996677787337 GB/s) +Achieved 243.83430637369838 GFLOPs +Completed in 0.006716251 s (achieved 22.929459306985397 GB/s) +Achieved 243.94561787521044 GFLOPs +Completed in 0.006714997 s (achieved 22.933741295789112 GB/s) +Achieved 243.99117378607912 GFLOPs +Completed in 0.0067141200000000005 s (achieved 22.93673690669812 GB/s) +Achieved 244.02304397300017 GFLOPs +Completed in 0.006716206000000001 s (achieved 22.92961293921002 GB/s) +Achieved 243.94725236242007 GFLOPs +Durations: [0.006716632, 0.006715159, 0.006717699000000001, 0.006715935, 0.0067156360000000005, 0.006719317000000001, 0.006716251, 0.006714997, 0.0067141200000000005, 0.006716206000000001] +Median duration 0.006716206000000001 (22.92961293921002 GB/s) 5.028423890177636% of peak. +Median achieved 243.94725236242007 GFLOPs +./sycl_spmm 100000 100000 128 128 j 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 128 columns. +Using method j with WG size 128 +OK! +Completed in 0.01099718 s (achieved 18.659329391716785 GB/s) +Achieved 297.9672970707036 GFLOPs +Completed in 0.010998858 s (achieved 18.656482700294887 GB/s) +Achieved 297.92183879453665 GFLOPs +Completed in 0.010996544 s (achieved 18.660408579277274 GB/s) +Achieved 297.98453041246415 GFLOPs +Completed in 0.010994259000000001 s (achieved 18.664286879179397 GB/s) +Achieved 298.04646224907015 GFLOPs +Completed in 0.010997197 s (achieved 18.65930054722126 GB/s) +Achieved 297.96683645841756 GFLOPs +Completed in 0.011000356000000001 s (achieved 18.653942108782662 GB/s) +Achieved 297.88126856985355 GFLOPs +Completed in 0.010999937000000001 s (achieved 18.654652658465224 GB/s) +Achieved 297.8926152031598 GFLOPs +Completed in 0.010997257 s (achieved 18.659198743832214 GB/s) +Achieved 297.96521077937894 GFLOPs +Completed in 0.010994611000000001 s (achieved 18.663689329254122 GB/s) +Achieved 298.03692008748646 GFLOPs +Completed in 0.010996824 s (achieved 18.659933449876075 GB/s) +Achieved 297.9769431610436 GFLOPs +Durations: [0.01099718, 0.010998858, 0.010996544, 0.010994259000000001, 0.010997197, 0.011000356000000001, 0.010999937000000001, 0.010997257, 0.010994611000000001, 0.010996824] +Median duration 0.010997197 (18.65930054722126 GB/s) 4.091951874390627% of peak. +Median achieved 297.96683645841756 GFLOPs +./sycl_spmm 100000 100000 256 128 j 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 256 columns. +Using method j with WG size 128 +OK! +Completed in 0.019023363 s (achieved 16.169591254711378 GB/s) +Achieved 344.50270438512894 GFLOPs +Completed in 0.019025222 s (achieved 16.16801128522968 GB/s) +Achieved 344.46904220092676 GFLOPs +Completed in 0.019049536000000002 s (achieved 16.147375138166094 GB/s) +Achieved 344.0293768835104 GFLOPs +Completed in 0.019033381000000002 s (achieved 16.161080577328853 GB/s) +Achieved 344.3213793702758 GFLOPs +Completed in 0.019035073 s (achieved 16.15964404234226 GB/s) +Achieved 344.2907731428191 GFLOPs +Completed in 0.01901023 s (achieved 16.18076183191892 GB/s) +Achieved 344.74070013882005 GFLOPs +Completed in 0.019037437 s (achieved 16.157637396252447 GB/s) +Achieved 344.2480203611442 GFLOPs +Completed in 0.019022591000000002 s (achieved 16.170247470494424 GB/s) +Achieved 344.51668545047306 GFLOPs +Completed in 0.019026505000000003 s (achieved 16.1669210398862 GB/s) +Achieved 344.4458138791123 GFLOPs +Completed in 0.019010917000000002 s (achieved 16.180177105607267 GB/s) +Achieved 344.72824219894284 GFLOPs +Durations: [0.019023363, 0.019025222, 0.019049536000000002, 0.019033381000000002, 0.019035073, 0.01901023, 0.019037437, 0.019022591000000002, 0.019026505000000003, 0.019010917000000002] +Median duration 0.019026505000000003 (16.1669210398862 GB/s) 3.5453774210276756% of peak. +Median achieved 344.4458138791123 GFLOPs +./sycl_spmm 100000 100000 512 128 j 128 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 512 columns. +Using method j with WG size 128 +OK! +Completed in 0.037564263 s (achieved 13.640624441373975 GB/s) +Achieved 348.9273834548544 GFLOPs +Completed in 0.037585603 s (achieved 13.632879695983593 GB/s) +Achieved 348.7292727483978 GFLOPs +Completed in 0.037570452000000004 s (achieved 13.638377414250966 GB/s) +Achieved 348.8699044664142 GFLOPs +Completed in 0.037538029 s (achieved 13.65015739105535 GB/s) +Achieved 349.17123645463647 GFLOPs +Completed in 0.037563952000000005 s (achieved 13.6407373750238 GB/s) +Achieved 348.9302722993576 GFLOPs +Completed in 0.037562898000000004 s (achieved 13.641120128697207 GB/s) +Achieved 348.9400631442228 GFLOPs +Completed in 0.037568393000000005 s (achieved 13.639124888839403 GB/s) +Achieved 348.88902487790733 GFLOPs +Completed in 0.037553391000000005 s (achieved 13.644573508687937 GB/s) +Achieved 349.028400657613 GFLOPs +Completed in 0.037602308 s (achieved 13.626823225850925 GB/s) +Achieved 348.57434814905514 GFLOPs +Completed in 0.037585059000000004 s (achieved 13.633077016055768 GB/s) +Achieved 348.73432019888537 GFLOPs +Durations: [0.037564263, 0.037585603, 0.037570452000000004, 0.037538029, 0.037563952000000005, 0.037562898000000004, 0.037568393000000005, 0.037553391000000005, 0.037602308, 0.037585059000000004] +Median duration 0.037568393000000005 (13.639124888839403 GB/s) 2.991036159833202% of peak. +Median achieved 348.88902487790733 GFLOPs +./sycl_spmm 100000 100000 1 16 j 256 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 1 columns. +Using method j with WG size 256 +OK! +Completed in 0.0018280150000000001 s (achieved 7.658582670273494 GB/s) +Achieved 1.7505326816246036 GFLOPs +Completed in 0.001827477 s (achieved 7.6608373183356075 GB/s) +Achieved 1.7510480296058446 GFLOPs +Completed in 0.001826069 s (achieved 7.666744246794618 GB/s) +Achieved 1.7523981842964313 GFLOPs +Completed in 0.001825451 s (achieved 7.669339796028489 GB/s) +Achieved 1.7529914525232395 GFLOPs +Completed in 0.001829263 s (achieved 7.653357663714841 GB/s) +Achieved 1.7493383947524221 GFLOPs +Completed in 0.001827959 s (achieved 7.658817292948037 GB/s) +Achieved 1.750586309649177 GFLOPs +Completed in 0.0018277310000000002 s (achieved 7.659772690839079 GB/s) +Achieved 1.7508046862475932 GFLOPs +Completed in 0.001824777 s (achieved 7.672172544919188 GB/s) +Achieved 1.753638937798975 GFLOPs +Completed in 0.001826347 s (achieved 7.6655772424407855 GB/s) +Achieved 1.7521314405203392 GFLOPs +Completed in 0.0018267700000000001 s (achieved 7.663802230165812 GB/s) +Achieved 1.751725723544836 GFLOPs +Durations: [0.0018280150000000001, 0.001827477, 0.001826069, 0.001825451, 0.001829263, 0.001827959, 0.0018277310000000002, 0.001824777, 0.001826347, 0.0018267700000000001] +Median duration 0.001827477 (7.6608373183356075 GB/s) 1.6800081838455279% of peak. +Median achieved 1.7510480296058446 GFLOPs +./sycl_spmm 100000 100000 8 16 j 256 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 8 columns. +Using method j with WG size 256 +OK! +Completed in 0.001048425 s (achieved 18.694712545007988 GB/s) +Achieved 24.417578749075997 GFLOPs +Completed in 0.001047689 s (achieved 18.70784555340373 GB/s) +Achieved 24.434732062663635 GFLOPs +Completed in 0.00104834 s (achieved 18.69622832287235 GB/s) +Achieved 24.419558540168268 GFLOPs +Completed in 0.001049616 s (achieved 18.673499641773756 GB/s) +Achieved 24.38987210560815 GFLOPs +Completed in 0.001047689 s (achieved 18.70784555340373 GB/s) +Achieved 24.434732062663635 GFLOPs +Completed in 0.0010490570000000002 s (achieved 18.68344999366097 GB/s) +Achieved 24.402868480930966 GFLOPs +Completed in 0.001048606 s (achieved 18.69148564856581 GB/s) +Achieved 24.41336402805248 GFLOPs +Completed in 0.001050041 s (achieved 18.665941615613104 GB/s) +Achieved 24.380000399984382 GFLOPs +Completed in 0.0010498390000000001 s (achieved 18.66953313793829 GB/s) +Achieved 24.38469136696198 GFLOPs +Completed in 0.001047701 s (achieved 18.707631280298482 GB/s) +Achieved 24.434452195807772 GFLOPs +Durations: [0.001048425, 0.001047689, 0.00104834, 0.001049616, 0.001047689, 0.0010490570000000002, 0.001048606, 0.001050041, 0.0010498390000000001, 0.001047701] +Median duration 0.001048606 (18.69148564856581 GB/s) 4.099010010650397% of peak. +Median achieved 24.41336402805248 GFLOPs +./sycl_spmm 100000 100000 32 16 j 256 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 32 columns. +Using method j with WG size 256 +OK! +Completed in 0.0010886420000000001 s (achieved 35.64073772645185 GB/s) +Achieved 94.06214347783752 GFLOPs +Completed in 0.001089761 s (achieved 35.60414072443407 GB/s) +Achieved 93.96555758556234 GFLOPs +Completed in 0.0010879140000000002 s (achieved 35.66458745819981 GB/s) +Achieved 94.12508709328127 GFLOPs +Completed in 0.00108828 s (achieved 35.65259308266255 GB/s) +Achieved 94.09343183739479 GFLOPs +Completed in 0.0010879540000000001 s (achieved 35.66327620469247 GB/s) +Achieved 94.12162646582483 GFLOPs +Completed in 0.0010880150000000001 s (achieved 35.66127672872157 GB/s) +Achieved 94.116349498858 GFLOPs +Completed in 0.001087971 s (achieved 35.66271895114851 GB/s) +Achieved 94.12015577621095 GFLOPs +Completed in 0.001089104 s (achieved 35.62561885733594 GB/s) +Achieved 94.02224213665545 GFLOPs +Completed in 0.001088762 s (achieved 35.63680951392499 GB/s) +Achieved 94.0517762375983 GFLOPs +Completed in 0.0010893790000000001 s (achieved 35.61662561881585 GB/s) +Achieved 93.99850740651324 GFLOPs +Durations: [0.0010886420000000001, 0.001089761, 0.0010879140000000002, 0.00108828, 0.0010879540000000001, 0.0010880150000000001, 0.001087971, 0.001089104, 0.001088762, 0.0010893790000000001] +Median duration 0.0010886420000000001 (35.64073772645185 GB/s) 7.815951255800845% of peak. +Median achieved 94.06214347783752 GFLOPs +./sycl_spmm 100000 100000 64 16 j 256 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 64 columns. +Using method j with WG size 256 +OK! +Completed in 0.0013297270000000002 s (achieved 48.43099673842826 GB/s) +Achieved 154.01657633484166 GFLOPs +Completed in 0.001329384 s (achieved 48.44349262515571 GB/s) +Achieved 154.05631480445078 GFLOPs +Completed in 0.0013301830000000002 s (achieved 48.414394109682654 GB/s) +Achieved 153.96377791627165 GFLOPs +Completed in 0.0013289110000000001 s (achieved 48.4607351432865 GB/s) +Achieved 154.1111481506286 GFLOPs +Completed in 0.0013289270000000001 s (achieved 48.46015168628525 GB/s) +Achieved 154.10929268500075 GFLOPs +Completed in 0.0013308270000000001 s (achieved 48.39096591818471 GB/s) +Achieved 153.88927336160145 GFLOPs +Completed in 0.0013302280000000001 s (achieved 48.4127563094447 GB/s) +Achieved 153.958569508385 GFLOPs +Completed in 0.001330014 s (achieved 48.42054594914039 GB/s) +Achieved 153.9833415287358 GFLOPs +Completed in 0.0013287800000000001 s (achieved 48.46551272595916 GB/s) +Achieved 154.12634145607248 GFLOPs +Completed in 0.001330779 s (achieved 48.392711336743375 GB/s) +Achieved 153.89482400909543 GFLOPs +Durations: [0.0013297270000000002, 0.001329384, 0.0013301830000000002, 0.0013289110000000001, 0.0013289270000000001, 0.0013308270000000001, 0.0013302280000000001, 0.001330014, 0.0013287800000000001, 0.001330779] +Median duration 0.001330014 (48.42054594914039 GB/s) 10.618540778320261% of peak. +Median achieved 153.9833415287358 GFLOPs +./sycl_spmm 100000 100000 128 16 j 256 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 128 columns. +Using method j with WG size 256 +OK! +Completed in 0.001846533 s (achieved 62.60381157553101 GB/s) +Achieved 221.82111015616834 GFLOPs +Completed in 0.001847267 s (achieved 62.578936342174686 GB/s) +Achieved 221.73297092407324 GFLOPs +Completed in 0.001847801 s (achieved 62.560851520266525 GB/s) +Achieved 221.6688918341315 GFLOPs +Completed in 0.001847548 s (achieved 62.56941849413385 GB/s) +Achieved 221.69924678546917 GFLOPs +Completed in 0.0018461270000000001 s (achieved 62.6175793973004 GB/s) +Achieved 221.8698930246944 GFLOPs +Completed in 0.0018472330000000002 s (achieved 62.58008816429762 GB/s) +Achieved 221.7370521206583 GFLOPs +Completed in 0.001848543 s (achieved 62.535739769104644 GB/s) +Achieved 221.57991455973706 GFLOPs +Completed in 0.0018465270000000002 s (achieved 62.6040149968021 GB/s) +Achieved 221.8218309290901 GFLOPs +Completed in 0.001848414 s (achieved 62.540104110875596 GB/s) +Achieved 221.59537852450805 GFLOPs +Completed in 0.0018473360000000002 s (achieved 62.57659895113829 GB/s) +Achieved 221.72468895750418 GFLOPs +Durations: [0.001846533, 0.001847267, 0.001847801, 0.001847548, 0.0018461270000000001, 0.0018472330000000002, 0.001848543, 0.0018465270000000002, 0.001848414, 0.0018473360000000002] +Median duration 0.0018473360000000002 (62.57659895113829 GB/s) 13.722938366477694% of peak. +Median achieved 221.72468895750418 GFLOPs +./sycl_spmm 100000 100000 256 16 j 256 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 256 columns. +Using method j with WG size 256 +OK! +Completed in 0.003174151 s (achieved 68.67978366498633 GB/s) +Achieved 258.08476030283373 GFLOPs +Completed in 0.0031761140000000003 s (achieved 68.63733606539311 GB/s) +Achieved 257.9252507938947 GFLOPs +Completed in 0.0031745220000000004 s (achieved 68.67175719683152 GB/s) +Achieved 258.0545984560825 GFLOPs +Completed in 0.0031747380000000003 s (achieved 68.66708496890138 GB/s) +Achieved 258.0370411668616 GFLOPs +Completed in 0.003175896 s (achieved 68.64204747258727 GB/s) +Achieved 257.9429553108792 GFLOPs +Completed in 0.003176065 s (achieved 68.63839499506466 GB/s) +Achieved 257.92923003779833 GFLOPs +Completed in 0.003175901 s (achieved 68.64193940554193 GB/s) +Achieved 257.94254921674195 GFLOPs +Completed in 0.003174646 s (achieved 68.66907491417942 GB/s) +Achieved 258.044518979439 GFLOPs +Completed in 0.003175667 s (achieved 68.64699730796713 GB/s) +Achieved 257.96155579284607 GFLOPs +Completed in 0.0031761370000000003 s (achieved 68.6368390280394 GB/s) +Achieved 257.9233830278732 GFLOPs +Durations: [0.003174151, 0.0031761140000000003, 0.0031745220000000004, 0.0031747380000000003, 0.003175896, 0.003176065, 0.003175901, 0.003174646, 0.003175667, 0.0031761370000000003] +Median duration 0.003175896 (68.64204747258727 GB/s) 15.053080586093698% of peak. +Median achieved 257.9429553108792 GFLOPs +./sycl_spmm 100000 100000 512 16 j 256 +Multiplying 100000 x 100000 matrix with 16 nnz/row by 512 columns. +Using method j with WG size 256 +OK! +Completed in 0.005752126000000001 s (achieved 73.5032584473984 GB/s) +Achieved 284.8338162272523 GFLOPs +Completed in 0.005749876 s (achieved 73.53202121228354 GB/s) +Achieved 284.94527534159 GFLOPs +Completed in 0.0057525300000000005 s (achieved 73.49809631588188 GB/s) +Achieved 284.81381235734534 GFLOPs +Completed in 0.005751679 s (achieved 73.50897085877011 GB/s) +Achieved 284.8559524966536 GFLOPs +Completed in 0.005752083000000001 s (achieved 73.50380792488563 GB/s) +Achieved 284.835945517476 GFLOPs +Completed in 0.005750043000000001 s (achieved 73.52988560259462 GB/s) +Achieved 284.9369996015682 GFLOPs +Completed in 0.005750435000000001 s (achieved 73.52487316176949 GB/s) +Achieved 284.91757580078723 GFLOPs +Completed in 0.005752396000000001 s (achieved 73.49980842765345 GB/s) +Achieved 284.8204469928704 GFLOPs +Completed in 0.00575345 s (achieved 73.48634367205763 GB/s) +Achieved 284.76826947309877 GFLOPs +Completed in 0.005751132 s (achieved 73.51596242270217 GB/s) +Achieved 284.88304563345093 GFLOPs +Durations: [0.005752126000000001, 0.005749876, 0.0057525300000000005, 0.005751679, 0.005752083000000001, 0.005750043000000001, 0.005750435000000001, 0.005752396000000001, 0.00575345, 0.005751132] +Median duration 0.005752083000000001 (73.50380792488563 GB/s) 16.11925612387843% of peak. +Median achieved 284.835945517476 GFLOPs +./sycl_spmm 100000 100000 1 32 j 256 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 1 columns. +Using method j with WG size 256 +OK! +Completed in 0.0036710130000000003 s (achieved 7.3004383258789876 GB/s) +Achieved 1.7433879967191617 GFLOPs +Completed in 0.0036736940000000004 s (achieved 7.295110588960322 GB/s) +Achieved 1.7421157015254944 GFLOPs +Completed in 0.003673721 s (achieved 7.295056973569849 GB/s) +Achieved 1.742102897852069 GFLOPs +Completed in 0.003672256 s (achieved 7.297967244113701 GB/s) +Achieved 1.74279788772896 GFLOPs +Completed in 0.0036706630000000002 s (achieved 7.301134427213831 GB/s) +Achieved 1.7435542298489402 GFLOPs +Completed in 0.003668324 s (achieved 7.305789783018076 GB/s) +Achieved 1.7446659564422335 GFLOPs +Completed in 0.0036735210000000003 s (achieved 7.295454143313731 GB/s) +Achieved 1.742197744343914 GFLOPs +Completed in 0.003668436 s (achieved 7.305566731980605 GB/s) +Achieved 1.7446126905307875 GFLOPs +Completed in 0.0036680130000000003 s (achieved 7.306409219378448 GB/s) +Achieved 1.7448138815211396 GFLOPs +Completed in 0.003667533 s (achieved 7.307365468831501 GB/s) +Achieved 1.7450422395653973 GFLOPs +Durations: [0.0036710130000000003, 0.0036736940000000004, 0.003673721, 0.003672256, 0.0036706630000000002, 0.003668324, 0.0036735210000000003, 0.003668436, 0.0036680130000000003, 0.003667533] +Median duration 0.0036710130000000003 (7.3004383258789876 GB/s) 1.6009733170787253% of peak. +Median achieved 1.7433879967191617 GFLOPs +./sycl_spmm 100000 100000 8 32 j 256 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 8 columns. +Using method j with WG size 256 +OK! +Completed in 0.0019402240000000002 s (achieved 16.69910484562607 GB/s) +Achieved 26.388705633988653 GFLOPs +Completed in 0.0019427670000000002 s (achieved 16.677246422242092 GB/s) +Achieved 26.35416393216479 GFLOPs +Completed in 0.0019414150000000002 s (achieved 16.688860444572644 GB/s) +Achieved 26.372516952841096 GFLOPs +Completed in 0.001939453 s (achieved 16.70574332041045 GB/s) +Achieved 26.399196061982426 GFLOPs +Completed in 0.001939942 s (achieved 16.701532313852685 GB/s) +Achieved 26.392541632687987 GFLOPs +Completed in 0.0019404650000000002 s (achieved 16.69703086631297 GB/s) +Achieved 26.38542823498491 GFLOPs +Completed in 0.0019414170000000002 s (achieved 16.688843252119458 GB/s) +Achieved 26.372489784523363 GFLOPs +Completed in 0.0019397560000000002 s (achieved 16.70313379620942 GB/s) +Achieved 26.39507236992694 GFLOPs +Completed in 0.0019402660000000002 s (achieved 16.698743368177354 GB/s) +Achieved 26.388134410436507 GFLOPs +Completed in 0.001940382 s (achieved 16.697745083184653 GB/s) +Achieved 26.386556873852676 GFLOPs +Durations: [0.0019402240000000002, 0.0019427670000000002, 0.0019414150000000002, 0.001939453, 0.001939942, 0.0019404650000000002, 0.0019414170000000002, 0.0019397560000000002, 0.0019402660000000002, 0.001940382] +Median duration 0.001940382 (16.697745083184653 GB/s) 3.6617862024527748% of peak. +Median achieved 26.386556873852676 GFLOPs +./sycl_spmm 100000 100000 32 32 j 256 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 32 columns. +Using method j with WG size 256 +OK! +Completed in 0.002013771 s (achieved 25.62357090255049 GB/s) +Achieved 101.69974639618904 GFLOPs +Completed in 0.002014488 s (achieved 25.614450917553246 GB/s) +Achieved 101.66354924923851 GFLOPs +Completed in 0.0020142800000000002 s (achieved 25.61709593502393 GB/s) +Achieved 101.67404730226184 GFLOPs +Completed in 0.002015171 s (achieved 25.60576943594365 GB/s) +Achieved 101.62909251869941 GFLOPs +Completed in 0.002015003 s (achieved 25.607904305849672 GB/s) +Achieved 101.63756580015017 GFLOPs +Completed in 0.0020133580000000002 s (achieved 25.62882706403928 GB/s) +Achieved 101.72060805877543 GFLOPs +Completed in 0.002013636 s (achieved 25.625288781090525 GB/s) +Achieved 101.70656464226902 GFLOPs +Completed in 0.002013813 s (achieved 25.623036498423637 GB/s) +Achieved 101.69762535051666 GFLOPs +Completed in 0.002013383 s (achieved 25.628508833143023 GB/s) +Achieved 101.719345002913 GFLOPs +Completed in 0.0020154590000000003 s (achieved 25.602110486990803 GB/s) +Achieved 101.61457017979527 GFLOPs +Durations: [0.002013771, 0.002014488, 0.0020142800000000002, 0.002015171, 0.002015003, 0.0020133580000000002, 0.002013636, 0.002013813, 0.002013383, 0.0020154590000000003] +Median duration 0.0020142800000000002 (25.61709593502393 GB/s) 5.617784196277177% of peak. +Median achieved 101.67404730226184 GFLOPs +./sycl_spmm 100000 100000 64 32 j 256 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 64 columns. +Using method j with WG size 256 +OK! +Completed in 0.002481679 s (achieved 31.10797327132155 GB/s) +Achieved 165.04954911573978 GFLOPs +Completed in 0.0024814710000000003 s (achieved 31.110580780512848 GB/s) +Achieved 165.06338377518819 GFLOPs +Completed in 0.002482344 s (achieved 31.099639695384685 GB/s) +Achieved 165.0053336685004 GFLOPs +Completed in 0.002482932 s (achieved 31.092274778366864 GB/s) +Achieved 164.96625763411964 GFLOPs +Completed in 0.002480726 s (achieved 31.119923764252885 GB/s) +Achieved 165.11295483660834 GFLOPs +Completed in 0.0024831460000000003 s (achieved 31.08959521510213 GB/s) +Achieved 164.95204067743094 GFLOPs +Completed in 0.002482768 s (achieved 31.094328588091997 GB/s) +Achieved 164.97715453074954 GFLOPs +Completed in 0.002483602 s (achieved 31.083887031819106 GB/s) +Achieved 164.92175477391305 GFLOPs +Completed in 0.0024834320000000003 s (achieved 31.086014837531284 GB/s) +Achieved 164.93304427099272 GFLOPs +Completed in 0.0024824960000000003 s (achieved 31.097735504911185 GB/s) +Achieved 164.99523060661528 GFLOPs +Durations: [0.002481679, 0.0024814710000000003, 0.002482344, 0.002482932, 0.002480726, 0.0024831460000000003, 0.002482768, 0.002483602, 0.0024834320000000003, 0.0024824960000000003] +Median duration 0.002482768 (31.094328588091997 GB/s) 6.818931707914912% of peak. +Median achieved 164.97715453074954 GFLOPs +./sycl_spmm 100000 100000 128 32 j 256 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 128 columns. +Using method j with WG size 256 +OK! +Completed in 0.003501788 s (achieved 36.666983837970776 GB/s) +Achieved 233.93763414575642 GFLOPs +Completed in 0.003501333 s (achieved 36.67174873112612 GB/s) +Achieved 233.96803446001852 GFLOPs +Completed in 0.0035015560000000003 s (achieved 36.669413255135716 GB/s) +Achieved 233.9531339781514 GFLOPs +Completed in 0.0034999180000000003 s (achieved 36.68657494261294 GB/s) +Achieved 234.06262661010913 GFLOPs +Completed in 0.0035009230000000004 s (achieved 36.67604343197494 GB/s) +Achieved 233.995434918163 GFLOPs +Completed in 0.003498717 s (achieved 36.69916829512076 GB/s) +Achieved 234.14297298123856 GFLOPs +Completed in 0.003500744 s (achieved 36.677918750985505 GB/s) +Achieved 234.00739956992 GFLOPs +Completed in 0.003500538 s (achieved 36.68007717670827 GB/s) +Achieved 234.02117046008357 GFLOPs +Completed in 0.0035014160000000003 s (achieved 36.670879438490026 GB/s) +Achieved 233.9624883190115 GFLOPs +Completed in 0.003501091 s (achieved 36.67428353047664 GB/s) +Achieved 233.9842066373025 GFLOPs +Durations: [0.003501788, 0.003501333, 0.0035015560000000003, 0.0034999180000000003, 0.0035009230000000004, 0.003498717, 0.003500744, 0.003500538, 0.0035014160000000003, 0.003501091] +Median duration 0.003501091 (36.67428353047664 GB/s) 8.042606037385228% of peak. +Median achieved 233.9842066373025 GFLOPs +./sycl_spmm 100000 100000 256 32 j 256 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 256 columns. +Using method j with WG size 256 +OK! +Completed in 0.005904662000000001 s (achieved 39.08775879127374 GB/s) +Achieved 277.47566245112756 GFLOPs +Completed in 0.005908916 s (achieved 39.059618380088665 GB/s) +Achieved 277.27589967432266 GFLOPs +Completed in 0.0059119080000000004 s (achieved 39.03985041715804 GB/s) +Achieved 277.13557112187806 GFLOPs +Completed in 0.005912004 s (achieved 39.03921648226219 GB/s) +Achieved 277.1310709532673 GFLOPs +Completed in 0.005907466 s (achieved 39.06920564587253 GB/s) +Achieved 277.34395762921025 GFLOPs +Completed in 0.005906686 s (achieved 39.074364880747005 GB/s) +Achieved 277.38058193714716 GFLOPs +Completed in 0.005907405 s (achieved 39.06960907538928 GB/s) +Achieved 277.3468214893003 GFLOPs +Completed in 0.005906962 s (achieved 39.072539149566225 GB/s) +Achieved 277.3676214609134 GFLOPs +Completed in 0.005909409 s (achieved 39.05635978149422 GB/s) +Achieved 277.2527675779422 GFLOPs +Completed in 0.005909075000000001 s (achieved 39.058567373066 GB/s) +Achieved 277.2684387996429 GFLOPs +Durations: [0.005904662000000001, 0.005908916, 0.0059119080000000004, 0.005912004, 0.005907466, 0.005906686, 0.005907405, 0.005906962, 0.005909409, 0.005909075000000001] +Median duration 0.005908916 (39.059618380088665 GB/s) 8.565705785107163% of peak. +Median achieved 277.27589967432266 GFLOPs +./sycl_spmm 100000 100000 512 32 j 256 +Multiplying 100000 x 100000 matrix with 32 nnz/row by 512 columns. +Using method j with WG size 256 +OK! +Completed in 0.011050158000000001 s (achieved 39.42025118554866 GB/s) +Achieved 296.5387463238082 GFLOPs +Completed in 0.011050190000000001 s (achieved 39.42013702931805 GB/s) +Achieved 296.53788758383337 GFLOPs +Completed in 0.011049748 s (achieved 39.421713870759774 GB/s) +Achieved 296.5497493698499 GFLOPs +Completed in 0.01104891 s (achieved 39.424703794310936 GB/s) +Achieved 296.5722410626931 GFLOPs +Completed in 0.011047568 s (achieved 39.429492898346496 GB/s) +Achieved 296.6082670864755 GFLOPs +Completed in 0.011048101000000001 s (achieved 39.42759067825321 GB/s) +Achieved 296.59395764032206 GFLOPs +Completed in 0.011049827 s (achieved 39.42143202784985 GB/s) +Achieved 296.5476292072265 GFLOPs +Completed in 0.011059045 s (achieved 39.388573244796454 GB/s) +Achieved 296.30044908941056 GFLOPs +Completed in 0.011049023 s (achieved 39.4243005920071 GB/s) +Achieved 296.56920797431593 GFLOPs +Completed in 0.011053239000000001 s (achieved 39.409263112830544 GB/s) +Achieved 296.4560885727704 GFLOPs +Durations: [0.011050158000000001, 0.011050190000000001, 0.011049748, 0.01104891, 0.011047568, 0.011048101000000001, 0.011049827, 0.011059045, 0.011049023, 0.011053239000000001] +Median duration 0.011049827 (39.42143202784985 GB/s) 8.645050883300406% of peak. +Median achieved 296.5476292072265 GFLOPs +./sycl_spmm 100000 100000 1 64 j 256 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 1 columns. +Using method j with WG size 256 +OK! +Completed in 0.007405753 s (achieved 7.075580835601727 GB/s) +Achieved 1.7283860263770612 GFLOPs +Completed in 0.0074139520000000006 s (achieved 7.067756036186908 GB/s) +Achieved 1.726474625139197 GFLOPs +Completed in 0.007407294 s (achieved 7.074108844606411 GB/s) +Achieved 1.7280264560850427 GFLOPs +Completed in 0.007409609 s (achieved 7.071898665638092 GB/s) +Achieved 1.7274865650805595 GFLOPs +Completed in 0.007423740000000001 s (achieved 7.058437391395711 GB/s) +Achieved 1.724198315134959 GFLOPs +Completed in 0.007412859000000001 s (achieved 7.068798151968086 GB/s) +Achieved 1.726729187753335 GFLOPs +Completed in 0.007410746 s (achieved 7.070813653578195 GB/s) +Achieved 1.727221523986924 GFLOPs +Completed in 0.007420997 s (achieved 7.061046379617186 GB/s) +Achieved 1.7248356251862116 GFLOPs +Completed in 0.007420238000000001 s (achieved 7.061768638687869 GB/s) +Achieved 1.725012054869399 GFLOPs +Completed in 0.007412318 s (achieved 7.069314079617199 GB/s) +Achieved 1.7268552158717423 GFLOPs +Durations: [0.007405753, 0.0074139520000000006, 0.007407294, 0.007409609, 0.007423740000000001, 0.007412859000000001, 0.007410746, 0.007420997, 0.007420238000000001, 0.007412318] +Median duration 0.007412859000000001 (7.068798151968086 GB/s) 1.5501750333263347% of peak. +Median achieved 1.726729187753335 GFLOPs +./sycl_spmm 100000 100000 8 64 j 256 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 8 columns. +Using method j with WG size 256 +OK! +Completed in 0.0037458710000000004 s (achieved 15.48371633726842 GB/s) +Achieved 27.336766268779677 GFLOPs +Completed in 0.0037463870000000003 s (achieved 15.481583723197842 GB/s) +Achieved 27.33300110212853 GFLOPs +Completed in 0.0037440000000000004 s (achieved 15.491454059829058 GB/s) +Achieved 27.35042735042735 GFLOPs +Completed in 0.0037444080000000003 s (achieved 15.489766072500645 GB/s) +Achieved 27.347447179901334 GFLOPs +Completed in 0.003744671 s (achieved 15.488678177602251 GB/s) +Achieved 27.345526482833872 GFLOPs +Completed in 0.003745935 s (achieved 15.4834517950792 GB/s) +Achieved 27.336299215015742 GFLOPs +Completed in 0.003745725 s (achieved 15.484319857971421 GB/s) +Achieved 27.33783179491287 GFLOPs +Completed in 0.0037454180000000004 s (achieved 15.48558905841751 GB/s) +Achieved 27.34007259002867 GFLOPs +Completed in 0.003744189 s (achieved 15.49067207878662 GB/s) +Achieved 27.349046749509707 GFLOPs +Completed in 0.0037456100000000003 s (achieved 15.484795266992558 GB/s) +Achieved 27.338671137678507 GFLOPs +Durations: [0.0037458710000000004, 0.0037463870000000003, 0.0037440000000000004, 0.0037444080000000003, 0.003744671, 0.003745935, 0.003745725, 0.0037454180000000004, 0.003744189, 0.0037456100000000003] +Median duration 0.0037456100000000003 (15.484795266992558 GB/s) 3.395788435743982% of peak. +Median achieved 27.338671137678507 GFLOPs +./sycl_spmm 100000 100000 32 64 j 256 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 32 columns. +Using method j with WG size 256 +OK! +Completed in 0.003901177 s (achieved 19.78890063178369 GB/s) +Achieved 104.99395438863708 GFLOPs +Completed in 0.0038991510000000004 s (achieved 19.799182950339702 GB/s) +Achieved 105.04850927804539 GFLOPs +Completed in 0.0038991810000000003 s (achieved 19.799030616942378 GB/s) +Achieved 105.04770104286003 GFLOPs +Completed in 0.003903128 s (achieved 19.77900904095382 GB/s) +Achieved 104.94147258301547 GFLOPs +Completed in 0.003900399 s (achieved 19.792847859924073 GB/s) +Achieved 105.0148971938512 GFLOPs +Completed in 0.0039008130000000004 s (achieved 19.79074721090193 GB/s) +Achieved 105.00375178200031 GFLOPs +Completed in 0.0039001440000000004 s (achieved 19.79414195988661 GB/s) +Achieved 105.02176329899613 GFLOPs +Completed in 0.003899276 s (achieved 19.79854824331491 GB/s) +Achieved 105.04514171348733 GFLOPs +Completed in 0.0038976820000000004 s (achieved 19.80664507776673 GB/s) +Achieved 105.08810108161722 GFLOPs +Completed in 0.003898517 s (achieved 19.802402811120228 GB/s) +Achieved 105.06559289083515 GFLOPs +Durations: [0.003901177, 0.0038991510000000004, 0.0038991810000000003, 0.003903128, 0.003900399, 0.0039008130000000004, 0.0039001440000000004, 0.003899276, 0.0038976820000000004, 0.003898517] +Median duration 0.0039001440000000004 (19.79414195988661 GB/s) 4.340820605238291% of peak. +Median achieved 105.02176329899613 GFLOPs +./sycl_spmm 100000 100000 64 64 j 256 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 64 columns. +Using method j with WG size 256 +OK! +Completed in 0.004821912 s (achieved 21.319344691483376 GB/s) +Achieved 169.8911137324779 GFLOPs +Completed in 0.0048187180000000005 s (achieved 21.333475833198786 GB/s) +Achieved 170.00372298192173 GFLOPs +Completed in 0.004820007 s (achieved 21.327770685810208 GB/s) +Achieved 169.95825939671872 GFLOPs +Completed in 0.004824785000000001 s (achieved 21.30664972636086 GB/s) +Achieved 169.78994918944574 GFLOPs +Completed in 0.004819437 s (achieved 21.330293144199207 GB/s) +Achieved 169.97836054294308 GFLOPs +Completed in 0.004820042 s (achieved 21.32761581745553 GB/s) +Achieved 169.95702527073416 GFLOPs +Completed in 0.0048195880000000005 s (achieved 21.329624855900544 GB/s) +Achieved 169.9730350395096 GFLOPs +Completed in 0.004819653 s (achieved 21.32933719502213 GB/s) +Achieved 169.9707427069957 GFLOPs +Completed in 0.004819929000000001 s (achieved 21.328115829092084 GB/s) +Achieved 169.961009799107 GFLOPs +Completed in 0.0048188020000000005 s (achieved 21.333103954053307 GB/s) +Achieved 170.00075952487774 GFLOPs +Durations: [0.004821912, 0.0048187180000000005, 0.004820007, 0.004824785000000001, 0.004819437, 0.004820042, 0.0048195880000000005, 0.004819653, 0.004819929000000001, 0.0048188020000000005] +Median duration 0.004819929000000001 (21.328115829092084 GB/s) 4.677218383572825% of peak. +Median achieved 169.961009799107 GFLOPs +./sycl_spmm 100000 100000 128 64 j 256 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 128 columns. +Using method j with WG size 256 +OK! +Completed in 0.006861982000000001 s (achieved 22.442496060176197 GB/s) +Achieved 238.76483499956717 GFLOPs +Completed in 0.006863346 s (achieved 22.438035908432997 GB/s) +Achieved 238.71738362017592 GFLOPs +Completed in 0.006861989000000001 s (achieved 22.4424731663079 GB/s) +Achieved 238.76459143260067 GFLOPs +Completed in 0.006862956000000001 s (achieved 22.439310990774235 GB/s) +Achieved 238.73094917117345 GFLOPs +Completed in 0.006860969000000001 s (achieved 22.445809622518333 GB/s) +Achieved 238.80008785930966 GFLOPs +Completed in 0.0068600530000000005 s (achieved 22.448806736624334 GB/s) +Achieved 238.83197403868454 GFLOPs +Completed in 0.006862306 s (achieved 22.441436450079607 GB/s) +Achieved 238.75356184932588 GFLOPs +Completed in 0.006861851 s (achieved 22.4429245111851 GB/s) +Achieved 238.76939327303958 GFLOPs +Completed in 0.006862798000000001 s (achieved 22.43982760384321 GB/s) +Achieved 238.73644539734374 GFLOPs +Completed in 0.006861455000000001 s (achieved 22.44421977554323 GB/s) +Achieved 238.78317353972298 GFLOPs +Durations: [0.006861982000000001, 0.006863346, 0.006861989000000001, 0.006862956000000001, 0.006860969000000001, 0.0068600530000000005, 0.006862306, 0.006861851, 0.006862798000000001, 0.006861455000000001] +Median duration 0.006861989000000001 (22.4424731663079 GB/s) 4.921594992611381% of peak. +Median achieved 238.76459143260067 GFLOPs +./sycl_spmm 100000 100000 256 64 j 256 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 256 columns. +Using method j with WG size 256 +OK! +Completed in 0.011424863 s (achieved 22.442282590171978 GB/s) +Achieved 286.81306725516095 GFLOPs +Completed in 0.011434098 s (achieved 22.42415658847773 GB/s) +Achieved 286.58141639156844 GFLOPs +Completed in 0.011426445 s (achieved 22.439175439080135 GB/s) +Achieved 286.7733577678797 GFLOPs +Completed in 0.011428830000000001 s (achieved 22.434492769601086 GB/s) +Achieved 286.71351310676596 GFLOPs +Completed in 0.011428345000000001 s (achieved 22.435444852251134 GB/s) +Achieved 286.7256807525499 GFLOPs +Completed in 0.011430309000000001 s (achieved 22.43158990714949 GB/s) +Achieved 286.6764144346404 GFLOPs +Completed in 0.011424128 s (achieved 22.443726470851868 GB/s) +Achieved 286.83152009501293 GFLOPs +Completed in 0.011416644 s (achieved 22.458439099966682 GB/s) +Achieved 287.0195479512193 GFLOPs +Completed in 0.011419776000000001 s (achieved 22.452279624398937 GB/s) +Achieved 286.94082966250824 GFLOPs +Completed in 0.011422299 s (achieved 22.447320281144805 GB/s) +Achieved 286.87744910197154 GFLOPs +Durations: [0.011424863, 0.011434098, 0.011426445, 0.011428830000000001, 0.011428345000000001, 0.011430309000000001, 0.011424128, 0.011416644, 0.011419776000000001, 0.011422299] +Median duration 0.011426445 (22.439175439080135 GB/s) 4.920871806815819% of peak. +Median achieved 286.7733577678797 GFLOPs +./sycl_spmm 100000 100000 512 64 j 256 +Multiplying 100000 x 100000 matrix with 64 nnz/row by 512 columns. +Using method j with WG size 256 +OK! +Completed in 0.021668405 s (achieved 21.284446363264856 GB/s) +Achieved 302.4495803913578 GFLOPs +Completed in 0.021672538 s (achieved 21.280387373181675 GB/s) +Achieved 302.39190260042454 GFLOPs +Completed in 0.02166529 s (achieved 21.28750660618898 GB/s) +Achieved 302.4930660978921 GFLOPs +Completed in 0.021669872 s (achieved 21.283005455685206 GB/s) +Achieved 302.42910525728996 GFLOPs +Completed in 0.021662582 s (achieved 21.29016771869577 GB/s) +Achieved 302.5308802062469 GFLOPs +Completed in 0.021675172000000003 s (achieved 21.27780134801237 GB/s) +Achieved 302.3551554746601 GFLOPs +Completed in 0.021667223000000003 s (achieved 21.285607481863273 GB/s) +Achieved 302.4660797555829 GFLOPs +Completed in 0.021664022 s (achieved 21.28875256865969 GB/s) +Achieved 302.51077108396584 GFLOPs +Completed in 0.021667306 s (achieved 21.285525944019067 GB/s) +Achieved 302.46492111202014 GFLOPs +Completed in 0.021666340000000003 s (achieved 21.286474965314863 GB/s) +Achieved 302.47840659751483 GFLOPs +Durations: [0.021668405, 0.021672538, 0.02166529, 0.021669872, 0.021662582, 0.021675172000000003, 0.021667223000000003, 0.021664022, 0.021667306, 0.021666340000000003] +Median duration 0.021667306 (21.285525944019067 GB/s) 4.667878496495409% of peak. +Median achieved 302.46492111202014 GFLOPs +./sycl_spmm 100000 100000 1 128 j 256 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 1 columns. +Using method j with WG size 256 +OK! +Completed in 0.01448226 s (achieved 7.153579897060266 GB/s) +Achieved 1.7676799063129651 GFLOPs +Completed in 0.014466195000000001 s (achieved 7.161524091165645 GB/s) +Achieved 1.7696429503404316 GFLOPs +Completed in 0.014459034 s (achieved 7.1650709169091105 GB/s) +Achieved 1.7705193860115414 GFLOPs +Completed in 0.014477930000000002 s (achieved 7.155719360433432 GB/s) +Achieved 1.7682085767785862 GFLOPs +Completed in 0.014480253 s (achieved 7.154571401480347 GB/s) +Achieved 1.7679249112567301 GFLOPs +Completed in 0.014459036000000001 s (achieved 7.1650699258235475 GB/s) +Achieved 1.770519141110099 GFLOPs +Completed in 0.014490062000000001 s (achieved 7.14972813780921 GB/s) +Achieved 1.7667281202799545 GFLOPs +Completed in 0.014482990000000001 s (achieved 7.153219328329302 GB/s) +Achieved 1.767590808251611 GFLOPs +Completed in 0.014477603 s (achieved 7.155880983889392 GB/s) +Achieved 1.7682485146194435 GFLOPs +Completed in 0.014461478000000002 s (achieved 7.163860014861552 GB/s) +Achieved 1.770220166984315 GFLOPs +Durations: [0.01448226, 0.014466195000000001, 0.014459034, 0.014477930000000002, 0.014480253, 0.014459036000000001, 0.014490062000000001, 0.014482990000000001, 0.014477603, 0.014461478000000002] +Median duration 0.014477930000000002 (7.155719360433432 GB/s) 1.5692367018494366% of peak. +Median achieved 1.7682085767785862 GFLOPs +./sycl_spmm 100000 100000 8 128 j 256 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 8 columns. +Using method j with WG size 256 +OK! +Completed in 0.007377376000000001 s (achieved 14.802011446888432 GB/s) +Achieved 27.760547923814645 GFLOPs +Completed in 0.00737662 s (achieved 14.803528445277104 GB/s) +Achieved 27.763392990285524 GFLOPs +Completed in 0.007379640000000001 s (achieved 14.797470337306425 GB/s) +Achieved 27.75203126439772 GFLOPs +Completed in 0.007378189 s (achieved 14.800380418555285 GB/s) +Achieved 27.757488998994198 GFLOPs +Completed in 0.007376865000000001 s (achieved 14.803036791374113 GB/s) +Achieved 27.762470914134934 GFLOPs +Completed in 0.007377409000000001 s (achieved 14.80194523578671 GB/s) +Achieved 27.760423747687025 GFLOPs +Completed in 0.007377016 s (achieved 14.802733788295972 GB/s) +Achieved 27.76190264464656 GFLOPs +Completed in 0.0073764650000000005 s (achieved 14.803839508490855 GB/s) +Achieved 27.763976376218146 GFLOPs +Completed in 0.007378611 s (achieved 14.799533950224507 GB/s) +Achieved 27.755901483355064 GFLOPs +Completed in 0.007378302000000001 s (achieved 14.800153748111692 GB/s) +Achieved 27.7570638881412 GFLOPs +Durations: [0.007377376000000001, 0.00737662, 0.007379640000000001, 0.007378189, 0.007376865000000001, 0.007377409000000001, 0.007377016, 0.0073764650000000005, 0.007378611, 0.007378302000000001] +Median duration 0.007377409000000001 (14.80194523578671 GB/s) 3.24604062188305% of peak. +Median achieved 27.760423747687025 GFLOPs +./sycl_spmm 100000 100000 32 128 j 256 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 32 columns. +Using method j with WG size 256 +OK! +Completed in 0.007695293000000001 s (achieved 16.685525034589325 GB/s) +Achieved 106.4546860009099 GFLOPs +Completed in 0.007696518 s (achieved 16.68286931830732 GB/s) +Achieved 106.43774236609335 GFLOPs +Completed in 0.007695561 s (achieved 16.684943956652415 GB/s) +Achieved 106.45097868758367 GFLOPs +Completed in 0.007694142 s (achieved 16.68802109448981 GB/s) +Achieved 106.47061101809663 GFLOPs +Completed in 0.007695321000000001 s (achieved 16.685464323060728 GB/s) +Achieved 106.45429865758686 GFLOPs +Completed in 0.007695132 s (achieved 16.685874134452796 GB/s) +Achieved 106.45691327972021 GFLOPs +Completed in 0.0076933 s (achieved 16.689847529668675 GB/s) +Achieved 106.48226378797135 GFLOPs +Completed in 0.007695595 s (achieved 16.684870240702637 GB/s) +Achieved 106.45050837524583 GFLOPs +Completed in 0.007695973 s (achieved 16.68405073666449 GB/s) +Achieved 106.44527988858589 GFLOPs +Completed in 0.007694422 s (achieved 16.68741381743814 GB/s) +Achieved 106.4667365527911 GFLOPs +Durations: [0.007695293000000001, 0.007696518, 0.007695561, 0.007694142, 0.007695321000000001, 0.007695132, 0.0076933, 0.007695595, 0.007695973, 0.007694422] +Median duration 0.007695321000000001 (16.685464323060728 GB/s) 3.659093053302791% of peak. +Median achieved 106.45429865758686 GFLOPs +./sycl_spmm 100000 100000 64 128 j 256 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 64 columns. +Using method j with WG size 256 +OK! +Completed in 0.009527233000000001 s (achieved 16.16418995945622 GB/s) +Achieved 171.97018273826197 GFLOPs +Completed in 0.009525405 s (achieved 16.167291994408636 GB/s) +Achieved 172.003185166405 GFLOPs +Completed in 0.009526485000000001 s (achieved 16.165459138391544 GB/s) +Achieved 171.98368548315563 GFLOPs +Completed in 0.009524527000000001 s (achieved 16.168782344782052 GB/s) +Achieved 172.01904094555036 GFLOPs +Completed in 0.009528968 s (achieved 16.161246842260358 GB/s) +Achieved 171.93887102989535 GFLOPs +Completed in 0.009525639 s (achieved 16.166894840335644 GB/s) +Achieved 171.99895985980572 GFLOPs +Completed in 0.009526261000000001 s (achieved 16.165839252147297 GB/s) +Achieved 171.98772949848842 GFLOPs +Completed in 0.00952711 s (achieved 16.164398647648657 GB/s) +Achieved 171.97240296375293 GFLOPs +Completed in 0.009525619 s (achieved 16.166928784365613 GB/s) +Achieved 171.99932098900868 GFLOPs +Completed in 0.009527238 s (achieved 16.164181476310343 GB/s) +Achieved 171.97009248640583 GFLOPs +Durations: [0.009527233000000001, 0.009525405, 0.009526485000000001, 0.009524527000000001, 0.009528968, 0.009525639, 0.009526261000000001, 0.00952711, 0.009525619, 0.009527238] +Median duration 0.009526485000000001 (16.165459138391544 GB/s) 3.5450568285946367% of peak. +Median achieved 171.98368548315563 GFLOPs +./sycl_spmm 100000 100000 128 128 j 256 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 128 columns. +Using method j with WG size 256 +OK! +Completed in 0.013599781 s (achieved 15.088478557117943 GB/s) +Achieved 240.94505639465814 GFLOPs +Completed in 0.013599724 s (achieved 15.088541796877644 GB/s) +Achieved 240.94606625840348 GFLOPs +Completed in 0.013598544 s (achieved 15.089851089940218 GB/s) +Achieved 240.96697411134605 GFLOPs +Completed in 0.01359948 s (achieved 15.088812513419631 GB/s) +Achieved 240.9503892795901 GFLOPs +Completed in 0.013602385000000002 s (achieved 15.08559006380131 GB/s) +Achieved 240.8989305919513 GFLOPs +Completed in 0.013599535000000001 s (achieved 15.088751490400224 GB/s) +Achieved 240.94941481455064 GFLOPs +Completed in 0.013596697000000001 s (achieved 15.091900922702036 GB/s) +Achieved 240.99970750249122 GFLOPs +Completed in 0.013599379 s (achieved 15.088924575158911 GB/s) +Achieved 240.95217877228072 GFLOPs +Completed in 0.013594891000000001 s (achieved 15.093905791521241 GB/s) +Achieved 241.03172287295277 GFLOPs +Completed in 0.01359577 s (achieved 15.092929933354272 GB/s) +Achieved 241.01613957870723 GFLOPs +Durations: [0.013599781, 0.013599724, 0.013598544, 0.01359948, 0.013602385000000002, 0.013599535000000001, 0.013596697000000001, 0.013599379, 0.013594891000000001, 0.01359577] +Median duration 0.01359948 (15.088812513419631 GB/s) 3.308950112592024% of peak. +Median achieved 240.9503892795901 GFLOPs +./sycl_spmm 100000 100000 256 128 j 256 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 256 columns. +Using method j with WG size 256 +OK! +Completed in 0.022503916000000002 s (achieved 13.668732321965653 GB/s) +Achieved 291.22042581388945 GFLOPs +Completed in 0.022477880000000002 s (achieved 13.684564736532094 GB/s) +Achieved 291.5577447695245 GFLOPs +Completed in 0.02246426 s (achieved 13.69286163888773 GB/s) +Achieved 291.7345151810031 GFLOPs +Completed in 0.022477538000000002 s (achieved 13.684772949777685 GB/s) +Achieved 291.56218087585927 GFLOPs +Completed in 0.02248059 s (achieved 13.682915083634372 GB/s) +Achieved 291.5225979389331 GFLOPs +Completed in 0.022459989000000003 s (achieved 13.695465478634027 GB/s) +Achieved 291.78999152670997 GFLOPs +Completed in 0.022479325 s (achieved 13.683685075063421 GB/s) +Achieved 291.53900306170226 GFLOPs +Completed in 0.02247141 s (achieved 13.688504815674674 GB/s) +Achieved 291.64169048582175 GFLOPs +Completed in 0.0225084 s (achieved 13.666009312079048 GB/s) +Achieved 291.16241047786605 GFLOPs +Completed in 0.022471626 s (achieved 13.688373240102875 GB/s) +Achieved 291.63888719045076 GFLOPs +Durations: [0.022503916000000002, 0.022477880000000002, 0.02246426, 0.022477538000000002, 0.02248059, 0.022459989000000003, 0.022479325, 0.02247141, 0.0225084, 0.022471626] +Median duration 0.022477880000000002 (13.684564736532094 GB/s) 3.0010010387131785% of peak. +Median achieved 291.5577447695245 GFLOPs +./sycl_spmm 100000 100000 512 128 j 256 +Multiplying 100000 x 100000 matrix with 128 nnz/row by 512 columns. +Using method j with WG size 256 +OK! +Completed in 0.04296925 s (achieved 11.924806786248308 GB/s) +Achieved 305.03674139064566 GFLOPs +Completed in 0.042968795000000004 s (achieved 11.924933058979198 GB/s) +Achieved 305.0399714490481 GFLOPs +Completed in 0.042963792 s (achieved 11.926321680358198 GB/s) +Achieved 305.07549240532586 GFLOPs +Completed in 0.042951039 s (achieved 11.929862837543931 GB/s) +Achieved 305.1660752607172 GFLOPs +Completed in 0.042976165000000004 s (achieved 11.922888047363 GB/s) +Achieved 304.9876600203857 GFLOPs +Completed in 0.042974601 s (achieved 11.923321964059657 GB/s) +Achieved 304.9987596161742 GFLOPs +Completed in 0.042967283 s (achieved 11.925352692186753 GB/s) +Achieved 305.0507056729652 GFLOPs +Completed in 0.042936692000000005 s (achieved 11.933849119070468 GB/s) +Achieved 305.268044403607 GFLOPs +Completed in 0.042961835000000004 s (achieved 11.926864948855188 GB/s) +Achieved 305.08938922185234 GFLOPs +Completed in 0.042952724000000005 s (achieved 11.929394838846543 GB/s) +Achieved 305.1541038468247 GFLOPs +Durations: [0.04296925, 0.042968795000000004, 0.042963792, 0.042951039, 0.042976165000000004, 0.042974601, 0.042967283, 0.042936692000000005, 0.042961835000000004, 0.042952724000000005] +Median duration 0.042967283 (11.925352692186753 GB/s) 2.615208923725165% of peak. +Median achieved 305.0507056729652 GFLOPs +Finished SPMM benchmark tests at Fri Sep 19 09:26:06 PM PDT 2025