From dce672dbd5e0bb0247130c1c1b4ad3b7dd48d6c5 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 14 Feb 2025 10:24:07 +0100 Subject: [PATCH] Fix indicies->indices typo everywhere --- docs/src/design.md | 2 +- docs/src/index.md | 4 ++-- src/KernelAbstractions.jl | 14 +++++++------- src/macros.jl | 30 +++++++++++++++--------------- test/localmem.jl | 4 ++-- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/docs/src/design.md b/docs/src/design.md index 1fc774d71..df2d60b79 100644 --- a/docs/src/design.md +++ b/docs/src/design.md @@ -8,7 +8,7 @@ - `ldg` on the GPU - `@aliasscopes` on the CPU -- Cartesian or Linear indicies supported +- Cartesian or Linear indices supported - `@index(Linear) - `@index(Cartesian) - `@synchronize` for inserting workgroup-level synchronization diff --git a/docs/src/index.md b/docs/src/index.md index f8bb81f58..164f6c637 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -58,7 +58,7 @@ The `CPU` backend always had this limitation and upon investigation the CUDA bac but allows for a wider set of valid kernels. This highlighted a design flaw in KernelAbstractions. Most GPU implementations execute KernelAbstraction workgroups on static blocks -This means a kernel with `ndrange=(32, 30)` might be executed on a static block of `(32,32)`. In order to block these extra indicies, +This means a kernel with `ndrange=(32, 30)` might be executed on a static block of `(32,32)`. In order to block these extra indices, KernelAbstraction would insert a dynamic boundscheck. Prior to v0.9.34 a kernel like @@ -118,7 +118,7 @@ Since this transformation can be disruptive, user can now opt out of the implici but users must avoid the use of `@index(Global)` and instead use their own derivation based on `@index(Group)` and `@index(Local)`. ```julia -@kernel unsafe_indicies=true function localmem(A) +@kernel unsafe_indices=true function localmem(A) N = @uniform prod(@groupsize()) gI = @index(Group, Linear) i = @index(Local, Linear) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index 997ab7709..582a61e9d 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -50,7 +50,7 @@ synchronize(backend) ``` """ macro kernel(expr) - return __kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false, #=unsafe_indicies=# false) + return __kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false, #=unsafe_indices=# false) end """ @@ -60,7 +60,7 @@ This allows for two different configurations: 1. `cpu={true, false}`: Disables code-generation of the CPU function. This relaxes semantics such that KernelAbstractions primitives can be used in non-kernel functions. 2. `inbounds={false, true}`: Enables a forced `@inbounds` macro around the function definition in the case the user is using too many `@inbounds` already in their kernel. Note that this can lead to incorrect results, crashes, etc and is fundamentally unsafe. Be careful! -3. `unsafe_indicies={false, true}`: Disables the implicit validation of indicies, users must avoid `@index(Global)`. +3. `unsafe_indices={false, true}`: Disables the implicit validation of indices, users must avoid `@index(Global)`. - [`@context`](@ref) @@ -72,7 +72,7 @@ macro kernel(ex...) return __kernel(ex[1], true, false, false) else generate_cpu = true - unsafe_indicies = false + unsafe_indices = false force_inbounds = false for i in 1:(length(ex) - 1) if ex[i] isa Expr && ex[i].head == :(=) && @@ -82,19 +82,19 @@ macro kernel(ex...) ex[i].args[1] == :inbounds && ex[i].args[2] isa Bool force_inbounds = ex[i].args[2] elseif ex[i] isa Expr && ex[i].head == :(=) && - ex[i].args[1] == :unsafe_indicies && ex[i].args[2] isa Bool - unsafe_indicies = ex[i].args[2] + ex[i].args[1] == :unsafe_indices && ex[i].args[2] isa Bool + unsafe_indices = ex[i].args[2] else error( "Configuration should be of form:\n" * "* `cpu=false`\n" * "* `inbounds=true`\n" * - "* `unsafe_indicies=true`\n" * + "* `unsafe_indices=true`\n" * "got `", ex[i], "`", ) end end - return __kernel(ex[end], generate_cpu, force_inbounds, unsafe_indicies) + return __kernel(ex[end], generate_cpu, force_inbounds, unsafe_indices) end end diff --git a/src/macros.jl b/src/macros.jl index fc8ddca8c..c83d60308 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -10,7 +10,7 @@ function find_return(stmt) end # XXX: Proper errors -function __kernel(expr, generate_cpu = true, force_inbounds = false, unsafe_indicies = false) +function __kernel(expr, generate_cpu = true, force_inbounds = false, unsafe_indices = false) def = splitdef(expr) name = def[:name] args = def[:args] @@ -46,7 +46,7 @@ function __kernel(expr, generate_cpu = true, force_inbounds = false, unsafe_indi def_gpu = deepcopy(def) def_gpu[:name] = gpu_name = Symbol(:gpu_, name) - transform_gpu!(def_gpu, constargs, force_inbounds, unsafe_indicies) + transform_gpu!(def_gpu, constargs, force_inbounds, unsafe_indices) gpu_function = combinedef(def_gpu) # create constructor functions @@ -78,7 +78,7 @@ end # The easy case, transform the function for GPU execution # - mark constant arguments by applying `constify`. -function transform_gpu!(def, constargs, force_inbounds, unsafe_indicies) +function transform_gpu!(def, constargs, force_inbounds, unsafe_indices) let_constargs = Expr[] for (i, arg) in enumerate(def[:args]) if constargs[i] @@ -89,13 +89,13 @@ function transform_gpu!(def, constargs, force_inbounds, unsafe_indicies) new_stmts = Expr[] body = MacroTools.flatten(def[:body]) push!(new_stmts, Expr(:aliasscope)) - if !unsafe_indicies + if !unsafe_indices push!(new_stmts, :(__active_lane__ = $__validindex(__ctx__))) end if force_inbounds push!(new_stmts, Expr(:inbounds, true)) end - if !unsafe_indicies + if !unsafe_indices append!(new_stmts, split(emit_gpu, body.args)) else push!(new_stmts, body) @@ -117,7 +117,7 @@ end # - mark constant arguments by applying `constify`. # - insert aliasscope markers # - insert implied loop bodys -# - handle indicies +# - handle indices # - hoist workgroup definitions # - hoist uniform variables function transform_cpu!(def, constargs, force_inbounds) @@ -149,7 +149,7 @@ function transform_cpu!(def, constargs, force_inbounds) end struct WorkgroupLoop - indicies::Vector{Any} + indices::Vector{Any} stmts::Vector{Any} allocations::Vector{Any} private_allocations::Vector{Any} @@ -177,7 +177,7 @@ end function split( emit, stmts, - indicies = Any[], private = Set{Symbol}(), + indices = Any[], private = Set{Symbol}(), ) # 1. Split the code into blocks separated by `@synchronize` # 2. Aggregate `@index` expressions @@ -191,7 +191,7 @@ function split( for stmt in stmts has_sync = find_sync(stmt) if has_sync - loop = WorkgroupLoop(deepcopy(indicies), current, allocations, private_allocations, deepcopy(private), is_sync(stmt)) + loop = WorkgroupLoop(deepcopy(indices), current, allocations, private_allocations, deepcopy(private), is_sync(stmt)) push!(new_stmts, emit(loop)) allocations = Any[] private_allocations = Any[] @@ -206,7 +206,7 @@ function split( function recurse(expr::Expr) expr = unblock(expr) if is_scope_construct(expr) && any(find_sync, expr.args) - new_args = unblock(split(emit, expr.args, deepcopy(indicies), deepcopy(private))) + new_args = unblock(split(emit, expr.args, deepcopy(indices), deepcopy(private))) return Expr(expr.head, new_args...) else return Expr(expr.head, map(recurse, expr.args)...) @@ -225,7 +225,7 @@ function split( continue elseif @capture(stmt, lhs_ = rhs_ | (vs__, lhs_ = rhs_)) if @capture(rhs, @index(args__)) - push!(indicies, stmt) + push!(indices, stmt) continue elseif @capture(rhs, @localmem(args__) | @uniform(args__)) push!(allocations, stmt) @@ -249,7 +249,7 @@ function split( # everything since the last `@synchronize` if !isempty(current) - loop = WorkgroupLoop(deepcopy(indicies), current, allocations, private_allocations, deepcopy(private), false) + loop = WorkgroupLoop(deepcopy(indices), current, allocations, private_allocations, deepcopy(private), false) push!(new_stmts, emit(loop)) end return new_stmts @@ -257,7 +257,7 @@ end function emit_cpu(loop) idx = gensym(:I) - for stmt in loop.indicies + for stmt in loop.indices # splice index into the i = @index(Cartesian, $idx) @assert stmt.head === :(=) rhs = stmt.args[2] @@ -300,7 +300,7 @@ function emit_cpu(loop) loopexpr = quote for $idx in $__workitems_iterspace(__ctx__) $__validindex(__ctx__, $idx) || continue - $(loop.indicies...) + $(loop.indices...) $(unblock(body)) end end @@ -318,7 +318,7 @@ function emit_gpu(loop) $(loop.allocations...) $(loop.private_allocations...) if __active_lane__ - $(loop.indicies...) + $(loop.indices...) $(unblock(body)) end end diff --git a/test/localmem.jl b/test/localmem.jl index fe47d2a5a..9a34d97c6 100644 --- a/test/localmem.jl +++ b/test/localmem.jl @@ -34,7 +34,7 @@ end end end -@kernel unsafe_indicies = true function localmem_unsafe_indicies(A) +@kernel unsafe_indices = true function localmem_unsafe_indices(A) N = @uniform prod(@groupsize()) gI = @index(Group, Linear) i = @index(Local, Linear) @@ -49,7 +49,7 @@ end function localmem_testsuite(backend, ArrayT) @testset "kernels" begin - @testset for kernel! in (localmem(backend(), 16), localmem2(backend(), 16), localmem_unsafe_indicies(backend(), 16)) + @testset for kernel! in (localmem(backend(), 16), localmem2(backend(), 16), localmem_unsafe_indices(backend(), 16)) A = ArrayT{Int}(undef, 64) kernel!(A, ndrange = size(A)) synchronize(backend())