Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "AcceleratedKernels"
uuid = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
authors = ["Andrei-Leonard Nicusan <[email protected]> and contributors"]
version = "0.4.1"
version = "0.4.2"

[deps]
ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197"
Expand Down
4 changes: 2 additions & 2 deletions src/accumulate/accumulate_nd.jl
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ end
# We have a block of threads to accumulate along the dims axis; do it in chunks of
# block_size and keep track of previous chunks' running prefix
ichunk = typeof(iblock)(0)
num_chunks = (length_dims + block_size - 0x1) ÷ block_size
num_chunks = (length_dims + (0x2 * block_size) - 0x1) ÷ (0x2 * block_size)
total = neutral

if ithread == 0x0
Expand Down Expand Up @@ -326,7 +326,7 @@ end

# ...and accumulate the last value too
if bi == 0x2 * block_size - 0x1
if iblock < num_chunks - 0x1
if ichunk < num_chunks - 0x1
temp[bi + bank_offset_b + 0x1] = op(t2, v[
input_base_idx +
((ichunk + 0x1) * block_size * 0x2 - 0x1) * vstrides[dims] +
Expand Down
11 changes: 11 additions & 0 deletions test/accumulate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,17 @@ end
# Test that undefined kwargs are not accepted
@test_throws MethodError AK.accumulate(+, v; init=10, dims=2, inclusive=false, bad=:kwarg)

# Test all options with bigger matrices
for D in [(1_000_000,3), (3,1_000_000)], dims in [1,2]
@testset let D = D, dims = dims
vh = ones(Float32, D)
v = array_from_host(vh)
s = AK.accumulate(+, v; init=0, dims)
sh = Array(s)
@test sh == accumulate(+, vh; init=0, dims)
end
end

# Testing different settings
AK.accumulate(
(x, y) -> x + 1,
Expand Down
Loading