diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..e20c1597 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,34 @@ +name: CI + +on: + push: + branches: + - master + tags: '*' + pull_request: + types: [opened, synchronize, reopened] + schedule: + - cron: '0 0 * * 0' + +jobs: + self-runner: + continue-on-error: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + runs-on: [self-hosted, linux, X64] + strategy: + matrix: + os: [ubuntu-latest] + julia-version: ['1'] + julia-arch: [x64] + + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@latest + with: + version: ${{ matrix.julia-version }} + - uses: julia-actions/cache@v2 + - uses: julia-actions/julia-buildpkg@latest + continue-on-error: true + - uses: julia-actions/julia-runtest@latest + continue-on-error: true diff --git a/src/array.jl b/src/array.jl index d602f8d9..4d621cf2 100644 --- a/src/array.jl +++ b/src/array.jl @@ -505,8 +505,17 @@ fill(v, dims...) = fill!(oneArray{typeof(v)}(undef, dims...), v) fill(v, dims::Dims) = fill!(oneArray{typeof(v)}(undef, dims...), v) function Base.fill!(A::oneDenseArray{T}, val) where T - B = [convert(T, val)] - unsafe_fill!(context(A), device(), pointer(A), pointer(B), length(A)) + length(A) == 0 && return A + val = convert(T, val) + sizeof(T) == 0 && return A + + # execute! is async, so we need to allocate the pattern in USM memory + # and keep it alive until the operation completes. + buf = oneL0.host_alloc(context(A), sizeof(T), Base.datatype_alignment(T)) + unsafe_store!(convert(Ptr{T}, buf), val) + unsafe_fill!(context(A), device(), pointer(A), convert(ZePtr{T}, buf), length(A)) + synchronize(global_queue(context(A), device())) + oneL0.free(buf) A end diff --git a/test/level-zero.jl b/test/level-zero.jl index 63616eff..ed7b2838 100644 --- a/test/level-zero.jl +++ b/test/level-zero.jl @@ -271,13 +271,23 @@ let src = rand(Int, 1024) synchronize(queue) @test chk == src + # FIX: Allocate pattern in USM Host Memory + # Standard Host memory (stack/heap) is not accessible by discrete GPUs for fill patterns. + # We must use USM Host Memory. + pattern_val = 42 + pattern_buf = oneL0.host_alloc(ctx, sizeof(Int), Base.datatype_alignment(Int)) + unsafe_store!(convert(Ptr{Int}, pattern_buf), pattern_val) + execute!(queue) do list - pattern = [42] - append_fill!(list, pointer(dst), pointer(pattern), sizeof(pattern), sizeof(src)) + # Use the USM pointer (converted to ZePtr) + append_fill!(list, pointer(dst), convert(ZePtr{Int}, pattern_buf), sizeof(Int), sizeof(src)) append_barrier!(list) append_copy!(list, pointer(chk), pointer(dst), sizeof(src)) end synchronize(queue) + + oneL0.free(pattern_buf) + @test all(isequal(42), chk) free(dst)