Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
b90d7e9
simplify if condition in examples
omlins Sep 16, 2025
d68cd72
add macro signatures and docstrings
omlins Sep 16, 2025
2bc27ac
and warp primitives tests
omlins Sep 17, 2025
2afdec3
and warp primitives tests
omlins Sep 17, 2025
7b59472
and warp primitives tests
omlins Sep 17, 2025
d2ea2cc
add warp level primitives
omlins Sep 17, 2025
ffbe50d
fix CPU target functions
omlins Sep 24, 2025
6c3a400
add test set for metal
omlins Sep 24, 2025
a006989
add missing exports
omlins Sep 24, 2025
5af959e
a rename kernel language file to memopt
omlins Sep 26, 2025
313abed
move kernel language wrappers to the corresponding file
omlins Sep 26, 2025
ca17bb5
move allocator wrappers to the corresponding file
omlins Sep 26, 2025
3b7d253
move allocator wrappers to the corresponding file
omlins Sep 26, 2025
2da9f45
move hide communication to the corresponding file
omlins Sep 26, 2025
37f5a05
move hide communication to the corresponding file
omlins Sep 26, 2025
59c581b
move hide communication to the corresponding file
omlins Sep 26, 2025
042af79
moves synchronization to the corresponding file
omlins Sep 26, 2025
70dbb79
add warp level primitives to module docstring
omlins Oct 29, 2025
660829c
add warp level primitives to module docstring
omlins Oct 29, 2025
53dabe4
and pass through macros to parallel kernel
omlins Oct 29, 2025
49d19b7
at parallel stencil kernel language tests
omlins Oct 29, 2025
d6b9f84
add parallel stencil kernel language tests
omlins Oct 31, 2025
74020cd
add parallel stencil kernel language tests
omlins Oct 31, 2025
22c0bb5
add parallel stencil kernel language tests
omlins Oct 31, 2025
fdee3df
add parallel stencil kernel language tests
omlins Oct 31, 2025
da3a60d
replace allocated with custom macro
omlins Nov 3, 2025
1d571ce
improve test runner
omlins Nov 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/diffusion2D_shmem_novis.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ end
ty = @threadIdx().y + 1
T_l = @sharedMem(eltype(T), (@blockDim().x+2, @blockDim().y+2))
T_l[tx,ty] = T[ix,iy]
if (ix>1 && ix<size(T2,1) && iy>1 && iy<size(T2,2))
if (1<ix<size(T2,1) && 1<iy<size(T2,2))
if (@threadIdx().x == 1) T_l[tx-1,ty] = T[ix-1,iy] end
if (@threadIdx().x == @blockDim().x) T_l[tx+1,ty] = T[ix+1,iy] end
if (@threadIdx().y == 1) T_l[tx,ty-1] = T[ix,iy-1] end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ else
end

@parallel_indices (ix,iy,iz) function diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy, _dz)
if (ix>1 && ix<size(T2,1) && iy>1 && iy<size(T2,2) && iz>1 && iz<size(T2,3))
if (1<ix<size(T2,1) && 1<iy<size(T2,2) && 1<iz<size(T2,3))
T2[ix,iy,iz] = T[ix,iy,iz] + dt*(Ci[ix,iy,iz]*(
- ((-lam*(T[ix+1,iy,iz] - T[ix,iy,iz])*_dx) - (-lam*(T[ix,iy,iz] - T[ix-1,iy,iz])*_dx))*_dx
- ((-lam*(T[ix,iy+1,iz] - T[ix,iy,iz])*_dy) - (-lam*(T[ix,iy,iz] - T[ix,iy-1,iz])*_dy))*_dy
Expand Down
12 changes: 12 additions & 0 deletions src/ParallelKernel/ParallelKernel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ Enables writing parallel high-performance kernels and whole applications that ca
- [`@threadIdx`](@ref)
- [`@sync_threads`](@ref)
- [`@sharedMem`](@ref)
!!! note "Warp-level primitives"
- [`@warpsize`](@ref)
- [`@laneid`](@ref)
- [`@active_mask`](@ref)
- [`@shfl_sync`](@ref)
- [`@shfl_up_sync`](@ref)
- [`@shfl_down_sync`](@ref)
- [`@shfl_xor_sync`](@ref)
- [`@vote_any_sync`](@ref)
- [`@vote_all_sync`](@ref)
- [`@vote_ballot_sync`](@ref)

# Submodules
- [`ParallelKernel.AD`](@ref)
Expand Down Expand Up @@ -74,6 +85,7 @@ include("FieldAllocators.jl")
## Exports
export @init_parallel_kernel, @parallel, @hide_communication, @parallel_indices, @parallel_async, @synchronize, @zeros, @ones, @rand, @falses, @trues, @fill, @fill!, @CellType
export @gridDim, @blockIdx, @blockDim, @threadIdx, @sync_threads, @sharedMem, @pk_show, @pk_println, @∀
export @warpsize, @laneid, @active_mask, @shfl_sync, @shfl_up_sync, @shfl_down_sync, @shfl_xor_sync, @vote_any_sync, @vote_all_sync, @vote_ballot_sync
export PKNumber

end # Module ParallelKernel
304 changes: 304 additions & 0 deletions src/ParallelKernel/kernel_language.jl

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions src/ParallelStencil.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ https://github.com/omlins/ParallelStencil.jl
- [`@threadIdx`](@ref)
- [`@sync_threads`](@ref)
- [`@sharedMem`](@ref)
!!! note "Warp-level primitives"
- [`@warpsize`](@ref)
- [`@laneid`](@ref)
- [`@active_mask`](@ref)
- [`@shfl_sync`](@ref)
- [`@shfl_up_sync`](@ref)
- [`@shfl_down_sync`](@ref)
- [`@shfl_xor_sync`](@ref)
- [`@vote_any_sync`](@ref)
- [`@vote_all_sync`](@ref)
- [`@vote_ballot_sync`](@ref)

# Submodules
- [`ParallelStencil.AD`](@ref)
Expand Down Expand Up @@ -60,8 +71,11 @@ using .ParallelKernel.Exceptions
include("shared.jl")

## Alphabetical include of function files
include("allocators.jl")
include("hide_communication.jl")
include("init_parallel_stencil.jl")
include("kernel_language.jl")
include("memopt.jl")
include("parallel.jl")
include("reset_parallel_stencil.jl")

Expand All @@ -74,6 +88,7 @@ include("FiniteDifferences.jl")
export @init_parallel_stencil, FiniteDifferences1D, FiniteDifferences2D, FiniteDifferences3D, AD
export @parallel, @hide_communication, @parallel_indices, @parallel_async, @synchronize, @zeros, @ones, @rand, @falses, @trues, @fill, @fill!, @CellType
export @gridDim, @blockIdx, @blockDim, @threadIdx, @sync_threads, @sharedMem, @ps_show, @ps_println, @∀
export @warpsize, @laneid, @active_mask, @shfl_sync, @shfl_up_sync, @shfl_down_sync, @shfl_xor_sync, @vote_any_sync, @vote_all_sync, @vote_ballot_sync
export PSNumber

end # Module ParallelStencil
8 changes: 8 additions & 0 deletions src/allocators.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
@doc replace(ParallelKernel.ZEROS_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro zeros(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@zeros($(args...)))); end
@doc replace(ParallelKernel.ONES_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro ones(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@ones($(args...)))); end
@doc replace(ParallelKernel.RAND_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro rand(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@rand($(args...)))); end
@doc replace(ParallelKernel.FALSES_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro falses(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@falses($(args...)))); end
@doc replace(ParallelKernel.TRUES_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro trues(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@trues($(args...)))); end
@doc replace(ParallelKernel.FILL_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro fill(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@fill($(args...)))); end
@doc replace(ParallelKernel.FILL!_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro fill!(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@fill!($(args...)))); end
@doc replace(ParallelKernel.CELLTYPE_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro CellType(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@CellType($(args...)))); end
1 change: 1 addition & 0 deletions src/hide_communication.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
@doc replace(ParallelKernel.HIDE_COMMUNICATION_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro hide_communication(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@hide_communication($(args...)))); end
22 changes: 0 additions & 22 deletions src/init_parallel_stencil.jl
Original file line number Diff line number Diff line change
@@ -1,25 +1,3 @@
# NOTE: @parallel and @parallel_indices and @parallel_async do not appear in the following as they are extended and therefore defined in parallel.jl
@doc replace(ParallelKernel.HIDE_COMMUNICATION_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro hide_communication(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@hide_communication($(args...)))); end
@doc replace(ParallelKernel.ZEROS_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro zeros(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@zeros($(args...)))); end
@doc replace(ParallelKernel.ONES_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro ones(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@ones($(args...)))); end
@doc replace(ParallelKernel.RAND_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro rand(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@rand($(args...)))); end
@doc replace(ParallelKernel.FALSES_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro falses(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@falses($(args...)))); end
@doc replace(ParallelKernel.TRUES_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro trues(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@trues($(args...)))); end
@doc replace(ParallelKernel.FILL_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro fill(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@fill($(args...)))); end
@doc replace(ParallelKernel.FILL!_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro fill!(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@fill!($(args...)))); end
@doc replace(ParallelKernel.CELLTYPE_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro CellType(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@CellType($(args...)))); end
@doc replace(ParallelKernel.SYNCHRONIZE_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro synchronize(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@synchronize($(args...)))); end
@doc replace(ParallelKernel.GRIDDIM_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro gridDim(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@gridDim($(args...)))); end
@doc replace(ParallelKernel.BLOCKIDX_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro blockIdx(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@blockIdx($(args...)))); end
@doc replace(ParallelKernel.BLOCKDIM_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro blockDim(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@blockDim($(args...)))); end
@doc replace(ParallelKernel.THREADIDX_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro threadIdx(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@threadIdx($(args...)))); end
@doc replace(ParallelKernel.SYNCTHREADS_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro sync_threads(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@sync_threads($(args...)))); end
@doc replace(ParallelKernel.SHAREDMEM_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro sharedMem(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@sharedMem($(args...)))); end
@doc replace(ParallelKernel.FORALL_DOC, "@init_parallel_kernel" => "@init_parallel_stencil") macro ∀(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@∀($(args...)))); end
@doc replace(replace(ParallelKernel.PKSHOW_DOC, "@init_parallel_kernel" => "@init_parallel_stencil"), "pk_show" => "ps_show") macro ps_show(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@pk_show($(args...)))); end
@doc replace(replace(ParallelKernel.PKPRINTLN_DOC, "@init_parallel_kernel" => "@init_parallel_stencil"), "pk_println" => "ps_println") macro ps_println(args...) check_initialized(__module__); esc(:(ParallelStencil.ParallelKernel.@pk_println($(args...)))); end


"""
@init_parallel_stencil(package, numbertype, ndims)
@init_parallel_stencil(package, numbertype, ndims, inbounds=...)
Expand Down
Loading
Loading