Skip to content

Commit 4b87fde

Browse files
Merge branch 'task-affinity' of https://github.com/AkhilAkkapelli/Dagger.jl into task-affinity
2 parents 6ca66cd + 7718848 commit 4b87fde

39 files changed

+4880
-495
lines changed

.buildkite/pipeline.yml

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
arch: x86_64
88
command: "julia --project -e 'using Pkg; Pkg.develop(;path=\"lib/TimespanLogging\")'"
99

10+
.gputest: &gputest
11+
if: build.message !~ /\[skip tests\]/
12+
1013
.bench: &bench
1114
if: build.message =~ /\[run benchmarks\]/
1215
agents:
@@ -28,6 +31,28 @@ steps:
2831
- JuliaCI/julia-coverage#v1:
2932
codecov: true
3033

34+
- label: Julia 1.10
35+
timeout_in_minutes: 90
36+
<<: *test
37+
plugins:
38+
- JuliaCI/julia#v1:
39+
version: "1.10"
40+
- JuliaCI/julia-test#v1:
41+
julia_args: "--threads=1"
42+
- JuliaCI/julia-coverage#v1:
43+
codecov: true
44+
45+
- label: Julia 1.11
46+
timeout_in_minutes: 90
47+
<<: *test
48+
plugins:
49+
- JuliaCI/julia#v1:
50+
version: "1.11"
51+
- JuliaCI/julia-test#v1:
52+
julia_args: "--threads=1"
53+
- JuliaCI/julia-coverage#v1:
54+
codecov: true
55+
3156
- label: Julia 1
3257
timeout_in_minutes: 90
3358
<<: *test
@@ -65,6 +90,83 @@ steps:
6590
- JuliaCI/julia-coverage#v1:
6691
codecov: true
6792

93+
- label: Julia 1.11 (CUDA)
94+
timeout_in_minutes: 20
95+
<<: *gputest
96+
plugins:
97+
- JuliaCI/julia#v1:
98+
version: "1.11"
99+
- JuliaCI/julia-test#v1: ~
100+
- JuliaCI/julia-coverage#v1:
101+
codecov: true
102+
agents:
103+
queue: "juliagpu"
104+
cuda: "*"
105+
env:
106+
CI_USE_CUDA: "1"
107+
108+
- label: Julia 1.11 (ROCm)
109+
timeout_in_minutes: 20
110+
<<: *gputest
111+
plugins:
112+
- JuliaCI/julia#v1:
113+
version: "1.11"
114+
- JuliaCI/julia-test#v1: ~
115+
- JuliaCI/julia-coverage#v1:
116+
codecov: true
117+
agents:
118+
queue: "juliagpu"
119+
rocm: "*"
120+
env:
121+
CI_USE_ROCM: "1"
122+
123+
- label: Julia 1.11 (oneAPI)
124+
timeout_in_minutes: 20
125+
<<: *gputest
126+
plugins:
127+
- JuliaCI/julia#v1:
128+
version: "1.11"
129+
- JuliaCI/julia-test#v1: ~
130+
- JuliaCI/julia-coverage#v1:
131+
codecov: true
132+
agents:
133+
queue: "juliagpu"
134+
intel: "*"
135+
env:
136+
CI_USE_ONEAPI: "1"
137+
138+
- label: Julia 1.11 (Metal)
139+
timeout_in_minutes: 20
140+
<<: *gputest
141+
plugins:
142+
- JuliaCI/julia#v1:
143+
version: "1.11"
144+
- JuliaCI/julia-test#v1: ~
145+
- JuliaCI/julia-coverage#v1:
146+
codecov: true
147+
agents:
148+
queue: "juliaecosystem"
149+
os: "macos"
150+
arch: "aarch64"
151+
env:
152+
CI_USE_METAL: "1"
153+
154+
- label: Julia 1.11 (OpenCL)
155+
timeout_in_minutes: 20
156+
<<: *gputest
157+
plugins:
158+
- JuliaCI/julia#v1:
159+
version: "1.11"
160+
- JuliaCI/julia-test#v1:
161+
- JuliaCI/julia-coverage#v1:
162+
codecov: true
163+
agents:
164+
queue: "juliaecosystem"
165+
os: macos
166+
arch: aarch64
167+
env:
168+
CI_USE_OPENCL: "1"
169+
68170
- label: Julia 1 - TimespanLogging
69171
timeout_in_minutes: 20
70172
<<: *test

Project.toml

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
name = "Dagger"
22
uuid = "d58978e5-989f-55fb-8d15-ea34adc7bf54"
3-
version = "0.18.16"
3+
version = "0.18.17"
44

55
[deps]
66
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
77
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
88
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
99
DistributedNext = "fab6aee4-877b-4bac-a744-3eca44acbb6f"
10+
FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
1011
Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
12+
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
1113
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1214
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
1315
MemPool = "f9f48841-c794-520a-933b-121f7ba6ed94"
@@ -28,35 +30,52 @@ TimespanLogging = "a526e669-04d3-4846-9525-c66122c55f63"
2830
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
2931

3032
[weakdeps]
33+
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
34+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
3135
Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
3236
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
3337
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
3438
GraphViz = "f526b714-d49f-11e8-06ff-31ed36ee7ee0"
3539
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
40+
Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
41+
OpenCL = "08131aa3-fb12-5dee-8b74-c09406e224a2"
42+
oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
3643
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
3744
PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"
3845

3946
[extensions]
47+
CUDAExt = "CUDA"
4048
DistributionsExt = "Distributions"
4149
GraphVizExt = "GraphViz"
4250
GraphVizSimpleExt = "Colors"
51+
IntelExt = "oneAPI"
4352
JSON3Ext = "JSON3"
53+
MetalExt = "Metal"
54+
OpenCLExt = "OpenCL"
4455
PlotsExt = ["DataFrames", "Plots"]
4556
PythonExt = "PythonCall"
57+
ROCExt = "AMDGPU"
4658

4759
[compat]
48-
Adapt = "4.0.4"
60+
AMDGPU = "1"
61+
Adapt = "4"
62+
CUDA = "3, 4, 5"
4963
Colors = "0.12, 0.13"
5064
DataFrames = "1"
5165
DataStructures = "0.18"
5266
DistributedNext = "1.0.0"
5367
Distributions = "0.25"
68+
FillArrays = "1.13.0"
5469
GraphViz = "0.2"
5570
Graphs = "1"
5671
JSON3 = "1"
72+
KernelAbstractions = "0.9"
5773
MacroTools = "0.5"
5874
MemPool = "0.4.12"
75+
Metal = "1.1"
5976
OnlineStats = "1"
77+
OpenCL = "0.10"
78+
oneAPI = "1, 2"
6079
Plots = "1"
6180
PrecompileTools = "1.2"
6281
Preferences = "1.4.3"

docs/make.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,11 @@ makedocs(;
2727
"Scopes" => "scopes.md",
2828
"Processors" => "processors.md",
2929
"Task Queues" => "task-queues.md",
30-
"Datadeps" => "datadeps.md",
30+
"Datadeps" => [
31+
"Basics" => "datadeps.md",
32+
"Stencils" => "stencils.md",
33+
],
34+
"GPUs" => "gpu.md",
3135
"Option Propagation" => "propagation.md",
3236
"Logging and Visualization" => [
3337
"Logging: Basics" => "logging.md",

docs/src/data-management.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,31 @@ VRAM, etc.) the value resides, and where the value is allowed to be transferred
2626
and dereferenced. See [Processors](@ref) and [Scopes](@ref) for more details on
2727
how these properties can be used to control scheduling behavior around `Chunk`s.
2828

29+
## Data movement rules
30+
31+
Dagger utilizes a 3-argument function `Dagger.move(from_proc::Dagger.Processor, to_proc::Dagger.Processor, x)` to manage data movement between processors. This function is invoked by the scheduler for every argument of a task, including the task's function itself, before the task is executed. The purpose of `move` is to transfer the argument `x` from its current processor (`from_proc`) to the target processor (`to_proc`) where the task will run, and to perform any necessary data conversion or unwrapping before execution.
32+
33+
This `move` mechanism is fundamental to how Dagger handles `Chunk` objects. When a `Chunk` is passed as an argument to a task, the `move` function is responsible for unwrapping the `Chunk` and providing its underlying value to the task.
34+
35+
While users can define custom `move` implementations for their specific data types if needed, the default fallback implementation of `move` is designed to handle most common use cases effectively. Therefore, custom implementations are generally unnecessary.
36+
37+
Here's an example of a custom `move` implementation:
38+
39+
```julia
40+
struct MyCustomType
41+
data::Vector{Float64}
42+
end
43+
44+
# Custom move function for MyCustomType
45+
function Dagger.move(from_proc::Dagger.Processor, to_proc::Dagger.Processor, x::MyCustomType)
46+
return x.data
47+
end
48+
49+
A = MyCustomType(rand(100))
50+
s = fetch(Dagger.@spawn sum(A))
51+
@assert s == sum(A.data)
52+
```
53+
2954
## Mutation
3055

3156
Normally, Dagger tasks should be functional and "pure": never mutating their

docs/src/datadeps.md

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -179,9 +179,44 @@ Dagger.spawn_datadeps() do
179179
end
180180
```
181181

182-
You can pass any number of aliasing modifiers to `Deps`. This is particularly
183-
useful for declaring aliasing with `Diagonal`, `Bidiagonal`, `Tridiagonal`, and
184-
`SymTridiagonal` access, as these "wrappers" make a copy of their parent array
185-
and thus can't be used to "mask" access to the parent like `UpperTriangular`
186-
and `UnitLowerTriangular` can (which is valuable for writing memory-efficient,
187-
generic algorithms in Julia).
182+
We call `InOut(Diagonal)` an "aliasing modifier". The purpose of `Deps` is to
183+
pass an argument (here, `A`) as-is, while specifying to Datadeps what portions
184+
of the argument will be accessed (in this case, the diagonal elements) and how
185+
(read/write/both). You can pass any number of aliasing modifiers to `Deps`.
186+
187+
`Deps` is particularly useful for declaring aliasing with `Diagonal`,
188+
`Bidiagonal`, `Tridiagonal`, and `SymTridiagonal` access, as these "wrappers"
189+
make a copy of their parent array and thus can't be used to "mask" access to the
190+
parent like `UpperTriangular` and `UnitLowerTriangular` can (which is valuable
191+
for writing memory-efficient, generic algorithms in Julia).
192+
193+
### Supported Aliasing Modifiers
194+
195+
- Any function that returns the original object or a view of the original object
196+
- `UpperTriangular`/`LowerTriangular`/`UnitUpperTriangular`/`UnitLowerTriangular`
197+
- `Diagonal`/`Bidiagonal`/`Tridiagonal`/`SymTridiagonal` (via `Deps`, e.g. to read from the diagonal of `X`: `Dagger.@spawn sum(Deps(X, In(Diagonal)))`)
198+
- `Symbol` for field access (via `Deps`, e.g. to write to `X.value`: `Dagger.@spawn setindex!(Deps(X, InOut(:value)), :value, 42)`
199+
200+
## In-place data movement rules
201+
202+
Datadeps uses a specialized 5-argument function, `Dagger.move!(dep_mod, from_space::Dagger.MemorySpace, to_space::Dagger.MemorySpace, from, to)`, for managing in-place data movement. This function is an in-place variant of the more general `move` function (see [Data movement rules](@ref)) and is exclusively used within the Datadeps system. The `dep_mod` argument is usually just `identity`, but it can also be an access modifier function like `UpperTriangular`, which limits what portion of the data should be read from and written to.
203+
204+
The core responsibility of `move!` is to read data from the `from` argument and write it directly into the `to` argument. This is crucial for operations that modify data in place, as often encountered in numerical computing and linear algebra.
205+
206+
The default implementation of `move!` handles `Chunk` objects by unwrapping them and then recursively calling `move!` on the underlying values. This ensures that the in-place operation is performed on the actual data.
207+
208+
Users have the option to define their own `move!` implementations for custom data types. However, this is typically not necessary for types that are subtypes of `AbstractArray`, provided that these types support the standard `Base.copyto!(to, from)` function. The default `move!` will leverage `copyto!` for such array types, enabling efficient in-place updates.
209+
210+
Here's an example of a custom `move!` implementation:
211+
212+
```julia
213+
struct MyCustomArrayWrapper{T,N}
214+
data::Array{T,N}
215+
end
216+
217+
# Custom move! function for MyCustomArrayWrapper
218+
function Dagger.move!(dep_mod::Any, from_space::Dagger.MemorySpace, to_space::Dagger.MemorySpace, from::MyCustomArrayWrapper, to::MyCustomArrayWrapper)
219+
copyto!(dep_mod(to.data), dep_mod(from.data))
220+
return
221+
end
222+
```

0 commit comments

Comments
 (0)