Skip to content

Commit bcf544e

Browse files
authored
Updates for CUDA v4, KernelAbstractions v0.9 (#177)
* git ignore Manifest.toml * add CUDA v4 test to CI * rm CUDAKernels.jl * add Pkg to test, rm CUDAKernels * test Tullio with CUDA.jl v4 * CUDADevice -> CUDABackend, remove Events/waits * kernelabstractions 0.9 compat * rm debugging file * properly scope CUDABackend * print Pkg.status() * Update runtests.jl uncomment tests * Update Project.toml * rm cuda 3 * tensoroperations v4 * fix buildkite julia versions * fix buildkite pipeline julia version * clean up tests * clean up ci
1 parent cff1332 commit bcf544e

File tree

9 files changed

+19
-73
lines changed

9 files changed

+19
-73
lines changed

.buildkite/pipeline.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ env:
33
# SECRET_CODECOV_TOKEN: "..."
44

55
steps:
6-
- label: "Julia 1.6"
6+
- label: "Julia 1.8"
77
plugins:
88
- JuliaCI/julia#v0.5:
9-
version: 1.6
9+
version: "1.8"
1010
- JuliaCI/julia-test#v0.3: ~
1111
# - JuliaCI/julia-coverage#v0.3:
1212
# codecov: true
@@ -16,10 +16,10 @@ steps:
1616
if: build.message !~ /\[skip tests\]/
1717
timeout_in_minutes: 60
1818

19-
- label: "Julia 1.8"
19+
- label: "Julia 1.10"
2020
plugins:
2121
- JuliaCI/julia#v0.5:
22-
version: 1.8
22+
version: "1.10"
2323
- JuliaCI/julia-test#v0.3: ~
2424
# - JuliaCI/julia-coverage#v0.3:
2525
# codecov: true

.github/workflows/ci-julia-nightly.yml

Lines changed: 0 additions & 54 deletions
This file was deleted.

.github/workflows/ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ jobs:
2828
version:
2929
- '1.6'
3030
- '1' # automatically expands to the latest stable 1.x release of Julia
31+
- 'nightly'
3132
steps:
3233
- uses: actions/checkout@v2
3334
- uses: julia-actions/setup-julia@v1

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Manifest.toml

Project.toml

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,34 +22,33 @@ TullioTrackerExt = "Tracker"
2222
TullioChainRulesCoreExt = "ChainRulesCore"
2323

2424
[compat]
25-
CUDA = "3.6, 4"
26-
CUDAKernels = "0.3.3, 0.4"
25+
CUDA = "4, 5"
2726
ChainRulesCore = "1"
2827
DiffRules = "1"
2928
FillArrays = "0.11, 0.12, 0.13"
3029
ForwardDiff = "0.10"
31-
KernelAbstractions = "0.7.2, 0.8"
30+
KernelAbstractions = "0.7.2, 0.8, 0.9"
3231
LoopVectorization = "0.12.101"
3332
NamedDims = "0.2"
3433
OffsetArrays = "1"
3534
Requires = "1"
36-
TensorOperations = "3"
35+
TensorOperations = "4"
3736
Tracker = "0.2"
3837
VectorizationBase = "0.21.23"
3938
Zygote = "0.6.33"
40-
julia = "1.6"
39+
julia = "1.8"
4140

4241
[extras]
4342
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
4443
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
45-
CUDAKernels = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
4644
FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
4745
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
4846
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
4947
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
5048
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
5149
NamedDims = "356022a1-0364-5f58-8944-0da4b18d706f"
5250
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
51+
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
5352
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
5453
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
5554
TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2"
@@ -59,4 +58,4 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
5958
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
6059

6160
[targets]
62-
test = ["Test", "CUDA", "CUDAKernels", "FillArrays", "ForwardDiff", "KernelAbstractions", "LinearAlgebra", "LoopVectorization", "NamedDims", "OffsetArrays", "Printf", "Random", "TensorOperations", "Tracker", "VectorizationBase", "Zygote"]
61+
test = ["Test", "CUDA", "FillArrays", "ForwardDiff", "KernelAbstractions", "LinearAlgebra", "LoopVectorization", "NamedDims", "OffsetArrays", "Pkg", "Printf", "Random", "TensorOperations", "Tracker", "VectorizationBase", "Zygote"]

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ But it also co-operates with various other packages, provided they are loaded be
2727

2828
* It uses [`LoopVectorization.@avx`](https://github.com/chriselrod/LoopVectorization.jl) to speed many things up. (Disable with keyword `avx=false`.) On a good day this will match the speed of OpenBLAS for matrix multiplication.
2929

30-
* It uses [`KernelAbstractions.@kernel`](https://github.com/JuliaGPU/KernelAbstractions.jl) (plus CUDAKernels) to make a GPU version. (Disable with `cuda=false`.) This is somewhat experimental, and may not be fast.
30+
* It uses [`KernelAbstractions.@kernel`](https://github.com/JuliaGPU/KernelAbstractions.jl) to make a GPU version. (Disable with `cuda=false`.) This is somewhat experimental, and may not be fast.
3131

3232
The macro also tries to provide a gradient for use with [Tracker](https://github.com/FluxML/Tracker.jl) or (via [ChainRules](https://github.com/JuliaDiff/ChainRules.jl)) for [Zygote](https://github.com/FluxML/Zygote.jl), [Yota](https://github.com/dfdx/Yota.jl), etc. <!-- or [ReverseDiff](https://github.com/JuliaDiff/ReverseDiff.jl). -->
3333
(Disable with `grad=false`, or `nograd=A`.) This is done in one of two ways:
@@ -237,7 +237,7 @@ using Tracker # or Zygote
237237
ΔA = Tracker.gradient((A,B) -> sum(mul(A, B)), A, B)[1]
238238
ΔA ones(3,500) * B' # true
239239

240-
using CUDA, CUDAKernels, KernelAbstractions # Now defined with a GPU version:
240+
using CUDA, KernelAbstractions # Now defined with a GPU version:
241241
mul(A, B) = @tullio C[i,k] := A[i,j] * B[j,k]
242242

243243
cu(A * B) mul(cu(A), cu(B)) # true

src/macro.jl

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,16 +1161,15 @@ function make_many_actors(act!, args, ex1, outer::Vector, ex3, inner::Vector, ex
11611161
end
11621162
store.verbose==2 && @info "=====KA===== KernelAbstractions kernel $note" verbosetidy(kex1)
11631163
push!(store.outpre, macroexpand(store.mod, kex1))
1164-
if isdefined(store.mod, :CUDA) && isdefined(store.mod, :CuArray) # new-style, CUDA.jl, with CUDADevice()
1164+
if isdefined(store.mod, :CUDA) && isdefined(store.mod, :CuArray) # new-style, CUDA.jl, with CUDA.CUDABackend()
11651165
info2 = store.verbose>0 ? :(@info "running KernelAbstractions + CUDA actor $($note)" maxlog=3 _id=$(hash(store))) : nothing
11661166
kex2 = quote
11671167

11681168
local @inline function $act!(::Type{<:CuArray}, $(args...), $KEEP=nothing, $FINAL=true) where {$TYP}
11691169
$info2
1170-
cu_kern! = $kernel(CUDADevice())
1170+
cu_kern! = $kernel(CUDA.CUDABackend())
11711171
$(asserts...)
1172-
$ACC = cu_kern!($(args...), $KEEP, $FINAL; ndrange=tuple($(sizes...)), workgroupsize=$workgroupsize, dependencies=Event(CUDADevice()))
1173-
KernelAbstractions.wait(CUDADevice(), $ACC)
1172+
$ACC = cu_kern!($(args...), $KEEP, $FINAL; ndrange=tuple($(sizes...)), workgroupsize=$workgroupsize)
11741173
end
11751174

11761175
end
@@ -1185,7 +1184,6 @@ function make_many_actors(act!, args, ex1, outer::Vector, ex3, inner::Vector, ex
11851184
cpu_kern! = $kernel(CPU(), 4)
11861185
$(asserts...)
11871186
$ACC = cpu_kern!($(args...), $KEEP, $FINAL; ndrange=tuple($(sizes...)))
1188-
KernelAbstractions.wait($ACC)
11891187
end
11901188

11911189
end

test/cuda.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
using Tullio, Test
3-
using CUDA, CUDAKernels, KernelAbstractions
3+
using CUDA, KernelAbstractions
44
CUDA.allowscalar(false)
55
using Tracker, ForwardDiff
66
@tullio grad=Base

test/runtests.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using Test, Printf
2+
import Pkg
23

34
t1 = @elapsed using Tullio
45
@info @sprintf("Loading Tullio took %.1f seconds", t1)

0 commit comments

Comments
 (0)