JuliaGPU · vchuravy · Feb 14, 2025 · Feb 14, 2025
diff --git a/docs/src/design.md b/docs/src/design.md
@@ -8,7 +8,7 @@
     - `ldg` on the GPU
     - `@aliasscopes` on the CPU
 
-- Cartesian or Linear indicies supported
+- Cartesian or Linear indices supported
   - `@index(Linear)
   - `@index(Cartesian)
 - `@synchronize` for inserting workgroup-level synchronization

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -58,7 +58,7 @@ The `CPU` backend always had this limitation and upon investigation the CUDA bac
 but allows for a wider set of valid kernels.
 
 This highlighted a design flaw in KernelAbstractions. Most GPU implementations execute KernelAbstraction workgroups on static blocks
-This means a kernel with `ndrange=(32, 30)` might be executed on a static block of `(32,32)`. In order to block these extra indicies,
+This means a kernel with `ndrange=(32, 30)` might be executed on a static block of `(32,32)`. In order to block these extra indices,
 KernelAbstraction would insert a dynamic boundscheck.
 
 Prior to v0.9.34 a kernel like
@@ -118,7 +118,7 @@ Since this transformation can be disruptive, user can now opt out of the implici
 but users must avoid the use of `@index(Global)` and instead use their own derivation based on `@index(Group)` and `@index(Local)`.
 
 ```julia
-@kernel unsafe_indicies=true function localmem(A)
+@kernel unsafe_indices=true function localmem(A)
     N = @uniform prod(@groupsize())
     gI = @index(Group, Linear)
     i = @index(Local, Linear)

diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl
@@ -50,7 +50,7 @@ synchronize(backend)
 ```
 """
 macro kernel(expr)
-    return __kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false, #=unsafe_indicies=# false)
+    return __kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false, #=unsafe_indices=# false)
 end
 
 """
@@ -60,7 +60,7 @@ This allows for two different configurations:
 
 1. `cpu={true, false}`: Disables code-generation of the CPU function. This relaxes semantics such that KernelAbstractions primitives can be used in non-kernel functions.
 2. `inbounds={false, true}`: Enables a forced `@inbounds` macro around the function definition in the case the user is using too many `@inbounds` already in their kernel. Note that this can lead to incorrect results, crashes, etc and is fundamentally unsafe. Be careful!
-3. `unsafe_indicies={false, true}`: Disables the implicit validation of indicies, users must avoid `@index(Global)`.
+3. `unsafe_indices={false, true}`: Disables the implicit validation of indices, users must avoid `@index(Global)`.
 
 - [`@context`](@ref)
 
@@ -72,7 +72,7 @@ macro kernel(ex...)
         return __kernel(ex[1], true, false, false)
     else
         generate_cpu = true
-        unsafe_indicies = false
+        unsafe_indices = false
         force_inbounds = false
         for i in 1:(length(ex) - 1)
             if ex[i] isa Expr && ex[i].head == :(=) &&
@@ -82,19 +82,19 @@ macro kernel(ex...)
                     ex[i].args[1] == :inbounds && ex[i].args[2] isa Bool
                 force_inbounds = ex[i].args[2]
             elseif ex[i] isa Expr && ex[i].head == :(=) &&
-                    ex[i].args[1] == :unsafe_indicies && ex[i].args[2] isa Bool
-                unsafe_indicies = ex[i].args[2]
+                    ex[i].args[1] == :unsafe_indices && ex[i].args[2] isa Bool
+                unsafe_indices = ex[i].args[2]
             else
                 error(
                     "Configuration should be of form:\n" *
                         "* `cpu=false`\n" *
                         "* `inbounds=true`\n" *
-                        "* `unsafe_indicies=true`\n" *
+                        "* `unsafe_indices=true`\n" *
                         "got `", ex[i], "`",
                 )
             end
         end
-        return __kernel(ex[end], generate_cpu, force_inbounds, unsafe_indicies)
+        return __kernel(ex[end], generate_cpu, force_inbounds, unsafe_indices)
     end
 end
 

diff --git a/src/macros.jl b/src/macros.jl
@@ -10,7 +10,7 @@ function find_return(stmt)
 end
 
 # XXX: Proper errors
-function __kernel(expr, generate_cpu = true, force_inbounds = false, unsafe_indicies = false)
+function __kernel(expr, generate_cpu = true, force_inbounds = false, unsafe_indices = false)
     def = splitdef(expr)
     name = def[:name]
     args = def[:args]
@@ -46,7 +46,7 @@ function __kernel(expr, generate_cpu = true, force_inbounds = false, unsafe_indi
 
     def_gpu = deepcopy(def)
     def_gpu[:name] = gpu_name = Symbol(:gpu_, name)
-    transform_gpu!(def_gpu, constargs, force_inbounds, unsafe_indicies)
+    transform_gpu!(def_gpu, constargs, force_inbounds, unsafe_indices)
     gpu_function = combinedef(def_gpu)
 
     # create constructor functions
@@ -78,7 +78,7 @@ end
 
 # The easy case, transform the function for GPU execution
 # - mark constant arguments by applying `constify`.
-function transform_gpu!(def, constargs, force_inbounds, unsafe_indicies)
+function transform_gpu!(def, constargs, force_inbounds, unsafe_indices)
     let_constargs = Expr[]
     for (i, arg) in enumerate(def[:args])
         if constargs[i]
@@ -89,13 +89,13 @@ function transform_gpu!(def, constargs, force_inbounds, unsafe_indicies)
     new_stmts = Expr[]
     body = MacroTools.flatten(def[:body])
     push!(new_stmts, Expr(:aliasscope))
-    if !unsafe_indicies
+    if !unsafe_indices
         push!(new_stmts, :(__active_lane__ = $__validindex(__ctx__)))
     end
     if force_inbounds
         push!(new_stmts, Expr(:inbounds, true))
     end
-    if !unsafe_indicies
+    if !unsafe_indices
         append!(new_stmts, split(emit_gpu, body.args))
     else
         push!(new_stmts, body)
@@ -117,7 +117,7 @@ end
 # - mark constant arguments by applying `constify`.
 # - insert aliasscope markers
 # - insert implied loop bodys
-#   - handle indicies
+#   - handle indices
 #   - hoist workgroup definitions
 #   - hoist uniform variables
 function transform_cpu!(def, constargs, force_inbounds)
@@ -149,7 +149,7 @@ function transform_cpu!(def, constargs, force_inbounds)
 end
 
 struct WorkgroupLoop
-    indicies::Vector{Any}
+    indices::Vector{Any}
     stmts::Vector{Any}
     allocations::Vector{Any}
     private_allocations::Vector{Any}
@@ -177,7 +177,7 @@ end
 function split(
         emit,
         stmts,
-        indicies = Any[], private = Set{Symbol}(),
+        indices = Any[], private = Set{Symbol}(),
     )
     # 1. Split the code into blocks separated by `@synchronize`
     # 2. Aggregate `@index` expressions
@@ -191,7 +191,7 @@ function split(
     for stmt in stmts
         has_sync = find_sync(stmt)
         if has_sync
-            loop = WorkgroupLoop(deepcopy(indicies), current, allocations, private_allocations, deepcopy(private), is_sync(stmt))
+            loop = WorkgroupLoop(deepcopy(indices), current, allocations, private_allocations, deepcopy(private), is_sync(stmt))
             push!(new_stmts, emit(loop))
             allocations = Any[]
             private_allocations = Any[]
@@ -206,7 +206,7 @@ function split(
             function recurse(expr::Expr)
                 expr = unblock(expr)
                 if is_scope_construct(expr) && any(find_sync, expr.args)
-                    new_args = unblock(split(emit, expr.args, deepcopy(indicies), deepcopy(private)))
+                    new_args = unblock(split(emit, expr.args, deepcopy(indices), deepcopy(private)))
                     return Expr(expr.head, new_args...)
                 else
                     return Expr(expr.head, map(recurse, expr.args)...)
@@ -225,7 +225,7 @@ function split(
             continue
         elseif @capture(stmt, lhs_ = rhs_ | (vs__, lhs_ = rhs_))
             if @capture(rhs, @index(args__))
-                push!(indicies, stmt)
+                push!(indices, stmt)
                 continue
             elseif @capture(rhs, @localmem(args__) | @uniform(args__))
                 push!(allocations, stmt)
@@ -249,15 +249,15 @@ function split(
 
     # everything since the last `@synchronize`
     if !isempty(current)
-        loop = WorkgroupLoop(deepcopy(indicies), current, allocations, private_allocations, deepcopy(private), false)
+        loop = WorkgroupLoop(deepcopy(indices), current, allocations, private_allocations, deepcopy(private), false)
         push!(new_stmts, emit(loop))
     end
     return new_stmts
 end
 
 function emit_cpu(loop)
     idx = gensym(:I)
-    for stmt in loop.indicies
+    for stmt in loop.indices
         # splice index into the i = @index(Cartesian, $idx)
         @assert stmt.head === :(=)
         rhs = stmt.args[2]
@@ -300,7 +300,7 @@ function emit_cpu(loop)
         loopexpr = quote
             for $idx in $__workitems_iterspace(__ctx__)
                 $__validindex(__ctx__, $idx) || continue
-                $(loop.indicies...)
+                $(loop.indices...)
                 $(unblock(body))
             end
         end
@@ -318,7 +318,7 @@ function emit_gpu(loop)
         $(loop.allocations...)
         $(loop.private_allocations...)
         if __active_lane__
-            $(loop.indicies...)
+            $(loop.indices...)
             $(unblock(body))
         end
     end

diff --git a/test/localmem.jl b/test/localmem.jl
@@ -34,7 +34,7 @@ end
     end
 end
 
-@kernel unsafe_indicies = true function localmem_unsafe_indicies(A)
+@kernel unsafe_indices = true function localmem_unsafe_indices(A)
     N = @uniform prod(@groupsize())
     gI = @index(Group, Linear)
     i = @index(Local, Linear)
@@ -49,7 +49,7 @@ end
 
 function localmem_testsuite(backend, ArrayT)
     @testset "kernels" begin
-        @testset for kernel! in (localmem(backend(), 16), localmem2(backend(), 16), localmem_unsafe_indicies(backend(), 16))
+        @testset for kernel! in (localmem(backend(), 16), localmem2(backend(), 16), localmem_unsafe_indices(backend(), 16))
             A = ArrayT{Int}(undef, 64)
             kernel!(A, ndrange = size(A))
             synchronize(backend())