-
Notifications
You must be signed in to change notification settings - Fork 31
Closed as duplicate of#445
Closed as duplicate of#445
Copy link
Description
Consider the following matmul example following KernelAbstractions documentation:
using KernelAbstractions
using Random
# using CUDA, CUDA.CUDAKernels
using oneAPI
@kernel function matmul_kernel!(output, a, b)
i, j = @index(Global, NTuple)
# creating a temporary sum variable for matrix multiplication
tmp_sum = zero(eltype(output))
for k in 1:size(a)[2]
tmp_sum += a[i, k] * b[k, j]
end
output[i, j] = tmp_sum
end
# Creating a wrapper kernel for launching with error checks
function matmul!(output, a, b)
if size(a)[2] != size(b)[1]
println("Matrix size mismatch!")
return nothing
end
backend = KernelAbstractions.get_backend(a)
kernel! = matmul_kernel!(backend)
kernel!(output, a, b, ndrange = size(output))
return
end
backend = oneAPIBackend()
# backend = CPU()
# backend = CUDABackend()
a = randn!(allocate(backend, Float32, 256, 123));
b = randn!(allocate(backend, Float32, size(a, 2), 45));
output = KernelAbstractions.zeros(backend, Float32, size(a, 1), size(b, 2));
matmul!(output, a, b)
KernelAbstractions.synchronize(backend)
@show output ≈ a * b
@show output ≈ a * bWhen I run this code on my laptop I get
output ≈ a * b = false
output ≈ a * b = true
[47667] signal 11 (1): Segmentation fault
in expression starting at none:0
_ZN3NEO13DrmAllocation15makeBOsResidentEPNS_9OsContextEjPSt6vectorIPNS_12BufferObjectESaIS5_EEb at /home/mose/.julia/artifacts/df06a45fdfc25a70826c358bdd37c510002509f2/lib/libze_intel_gpu.so.1 (unknown line)
_ZN3NEO24DrmCommandStreamReceiverINS_15XeHpgCoreFamilyEE16processResidencyERKSt6vectorIPNS_18GraphicsAllocationESaIS5_EEj at /home/mose/.julia/artifacts/df06a45fdfc25a70826c358bdd37c510002509f2/lib/libze_intel_gpu.so.1 (unknown line)
_ZN3NEO24DrmCommandStreamReceiverINS_15XeHpgCoreFamilyEE13flushInternalERKNS_11BatchBufferERKSt6vectorIPNS_18GraphicsAllocationESaIS8_EE at /home/mose/.julia/artifacts/df06a45fdfc25a70826c358bdd37c510002509f2/lib/libze_intel_gpu.so.1 (unknown line)
_ZN3NEO24DrmCommandStreamReceiverINS_15XeHpgCoreFamilyEE5flushERNS_11BatchBufferERSt6vectorIPNS_18GraphicsAllocationESaIS7_EE at /home/mose/.julia/artifacts/df06a45fdfc25a70826c358bdd37c510002509f2/lib/libze_intel_gpu.so.1 (unknown line)
_ZN3NEO21CommandStreamReceiver17submitBatchBufferERNS_11BatchBufferERSt6vectorIPNS_18GraphicsAllocationESaIS5_EE at /home/mose/.julia/artifacts/df06a45fdfc25a70826c358bdd37c510002509f2/lib/libze_intel_gpu.so.1 (unknown line)
_ZN2L015CommandQueueImp17submitBatchBufferEmRSt6vectorIPN3NEO18GraphicsAllocationESaIS4_EEPvb at /home/mose/.julia/artifacts/df06a45fdfc25a70826c358bdd37c510002509f2/lib/libze_intel_gpu.so.1 (unknown line)
_ZN2L014CommandQueueHwIL14GFXCORE_FAMILY3079EE26executeCommandListsRegularERNS2_27CommandListExecutionContextEjPP25_ze_command_list_handle_tP18_ze_fence_handle_tPN3NEO12LinearStreamE at /home/mose/.julia/artifacts/df06a45fdfc25a70826c358bdd37c510002509f2/lib/libze_intel_gpu.so.1 (unknown line)
_ZN2L014CommandQueueHwIL14GFXCORE_FAMILY3079EE19executeCommandListsEjPP25_ze_command_list_handle_tP18_ze_fence_handle_tbPN3NEO12LinearStreamE at /home/mose/.julia/artifacts/df06a45fdfc25a70826c358bdd37c510002509f2/lib/libze_intel_gpu.so.1 (unknown line)
_ZN25ur_queue_handle_legacy_t_18executeCommandListENSt8__detail14_Node_iteratorISt4pairIKP25_ze_command_list_handle_t22ur_command_list_info_tELb0ELb0EEEbb at /home/mose/.julia/artifacts/4a18fb08eef19534a609707234107ea4ab173392/lib/libur_adapter_level_zero.so.0 (unknown line)
_ZN25ur_queue_handle_legacy_t_12queueReleaseEv at /home/mose/.julia/artifacts/4a18fb08eef19534a609707234107ea4ab173392/lib/libur_adapter_level_zero.so.0 (unknown line)
urQueueRelease at /home/mose/.julia/artifacts/4a18fb08eef19534a609707234107ea4ab173392/lib/./libur_loader.so.0 (unknown line)
_ZN4sycl3_V16detail10queue_implD2Ev at /home/mose/.julia/artifacts/4a18fb08eef19534a609707234107ea4ab173392/lib/libsycl.so.8 (unknown line)
_M_release at /opt/x86_64-linux-gnu/x86_64-linux-gnu/include/c++/8.1.0/bits/shared_ptr_base.h:161 [inlined]
~__shared_count at /opt/x86_64-linux-gnu/x86_64-linux-gnu/include/c++/8.1.0/bits/shared_ptr_base.h:712 [inlined]
~__shared_ptr at /opt/x86_64-linux-gnu/x86_64-linux-gnu/include/c++/8.1.0/bits/shared_ptr_base.h:1151 [inlined]
~shared_ptr at /opt/x86_64-linux-gnu/x86_64-linux-gnu/include/c++/8.1.0/bits/shared_ptr.h:103 [inlined]
~queue at /workspace/x86_64-linux-gnu-libgfortran5-cxx11/destdir/include/sycl/queue.hpp:110 [inlined]
~syclQueue_st at /workspace/srcdir/oneAPI.jl/deps/src/sycl.hpp:19 [inlined]
syclQueueDestroy at /workspace/srcdir/oneAPI.jl/deps/src/sycl.cpp:75
syclQueueDestroy at /home/mose/.julia/packages/oneAPI/CNvkW/lib/support/liboneapi_support.jl:58 [inlined]
#7 at /home/mose/.julia/packages/oneAPI/CNvkW/lib/sycl/SYCL.jl:74
unknown function (ip: 0x7f27693f9092)
run_finalizer at /cache/build/builder-demeter6-3/julialang/julia-release-1-dot-11/src/gc.c:299
jl_gc_run_finalizers_in_list at /cache/build/builder-demeter6-3/julialang/julia-release-1-dot-11/src/gc.c:389
run_finalizers at /cache/build/builder-demeter6-3/julialang/julia-release-1-dot-11/src/gc.c:435
ijl_atexit_hook at /cache/build/builder-demeter6-3/julialang/julia-release-1-dot-11/src/init.c:299
jl_repl_entrypoint at /cache/build/builder-demeter6-3/julialang/julia-release-1-dot-11/src/jlapi.c:1060
main at /cache/build/builder-demeter6-3/julialang/julia-release-1-dot-11/cli/loader_exe.c:58
unknown function (ip: 0x7f277059fd67)
__libc_start_main at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
unknown function (ip: 0x4010b8)
Allocations: 28785806 (Pool: 28784960; Big: 846); GC: 19
This is quite reproducible: I basically always have that the first time the result is approximately inaccurate, but the second one is fine, and then a segfault at Julia exit. No problem when using the CUDA backend of KernelAbstractions
julia> versioninfo()
Julia Version 1.11.3
Commit d63adeda50d (2025-01-21 19:42 UTC)
Build Info:
Official https://julialang.org/ release
Platform Info:
OS: Linux (x86_64-linux-gnu)
CPU: 22 × Intel(R) Core(TM) Ultra 7 155H
WORD_SIZE: 64
LLVM: libLLVM-16.0.6 (ORCJIT, alderlake)
Threads: 1 default, 0 interactive, 1 GC (on 22 virtual cores)
julia> oneAPI.versioninfo()
Binary dependencies:
- NEO: 24.26.30049+0
- libigc: 1.0.17193+0
- gmmlib: 22.3.20+0
- SPIRV_LLVM_Translator_unified: 0.7.0+0
- SPIRV_Tools: 2024.3.0+0
Toolchain:
- Julia: 1.11.3
- LLVM: 16.0.6
1 driver:
- 00000000-0000-0000-1823-cb6d01037561 (v1.3.30049, API v1.3.0)
1 device:
- Intel(R) Arc(TM) GraphicsCC: @vchuravy.
Metadata
Metadata
Assignees
Labels
No labels