diff --git a/Project.toml b/Project.toml
index ea21d52..b51bb27 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "AcceleratedKernels"
 uuid = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
 authors = ["Andrei-Leonard Nicusan <leonard@evophase.co.uk> and contributors"]
-version = "0.3.0"
+version = "0.3.1"
 
 [deps]
 ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197"
diff --git a/src/accumulate/accumulate.jl b/src/accumulate/accumulate.jl
index 15f47d3..e88fd18 100644
--- a/src/accumulate/accumulate.jl
+++ b/src/accumulate/accumulate.jl
@@ -71,11 +71,11 @@ For compatibility with the `Base.accumulate!` function, we provide the two-array
 we do not need the constraint of `dst` and `src` being different; to minimise memory use, we
 recommend using the single-array interface (the first one above).
 
-## CPU 
+## CPU
 The CPU implementation is currently single-threaded; we are waiting on a multithreaded
 implementation in OhMyThreads.jl ([issue](https://github.com/JuliaFolds2/OhMyThreads.jl/issues/129)).
 
-## GPU 
+## GPU
 For the 1D case (`dims=nothing`), the `alg` can be one of the following:
 - `DecoupledLookback()`: the default algorithm, using opportunistic lookback to reuse earlier
   blocks' results; requires device-level memory consistency guarantees, which Apple Metal does not
@@ -241,7 +241,7 @@ function accumulate(
     temp::Union{Nothing, AbstractArray}=nothing,
     temp_flags::Union{Nothing, AbstractArray}=nothing,
 )
-    dst_type = promote_type(eltype(v), typeof(init))
+    dst_type = Base.promote_op(op, eltype(v), typeof(init))
     vcopy = similar(v, dst_type)
     copyto!(vcopy, v)
     accumulate!(
@@ -252,7 +252,7 @@ function accumulate(
         inclusive=inclusive,
 
         alg=alg,
-        
+
         block_size=block_size,
         temp=temp,
         temp_flags=temp_flags,