Skip to content

Commit bf6f373

Browse files
Enhance documentation
1 parent 7463360 commit bf6f373

File tree

2 files changed

+293
-123
lines changed

2 files changed

+293
-123
lines changed

src/simulate/magnetization.jl

Lines changed: 128 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,57 @@
1+
#=========================================================================================
2+
# Magnetization Simulation
3+
#
4+
# This file implements the core magnetization simulation functionality for BlochSimulators.jl
5+
#
6+
# ## High-Level Workflow
7+
#
8+
# 1. User calls `simulate_magnetization(sequence, parameters)`
9+
# 2. The function automatically detects the computational resource based on input types:
10+
# - Single voxel → CPU1 (single-threaded)
11+
# - CPU array → CPUThreads (multi-threaded, default)
12+
# - CuArray → CUDALibs (GPU)
13+
# - DArray → CPUProcesses (distributed across workers)
14+
# 3. Memory is allocated for the output magnetization array
15+
# 4. The simulation is dispatched to the appropriate parallel execution strategy
16+
# 5. Results are returned as an array: (output_size(sequence)..., num_voxels)
17+
#
18+
# ## Architecture Overview
19+
#
20+
# The simulation uses multiple dispatch on `AbstractResource` types to select the execution
21+
# strategy:
22+
# - `CPU1()`: Serial execution, one voxel at a time
23+
# - `CPUThreads()`: Parallel execution using Julia threads (`Threads.@threads`)
24+
# - `CPUProcesses()`: Distributed execution across workers using `DArray`
25+
# - `CUDALibs()`: GPU execution with CUDA kernels (WARPSIZE threads per voxel)
26+
#
27+
# Each strategy calls the sequence-specific `simulate_magnetization!` method that
28+
# numerically integrates the Bloch equations for the given sequence and tissue properties
29+
# using either the isochromat or the EPG formalism.
30+
#
31+
# ## For Users
32+
#
33+
# Most users should call the convenience function without specifying a resource:
34+
# `magnetization = simulate_magnetization(sequence, parameters)`
35+
# Or, in case an NVIDIA GPU is available, one would typically do:
36+
# `magnetization = simulate_magnetization(gpu(f32(sequence,)) gpu(f32(parameters)))`
37+
#
38+
# The function will automatically select the best computational resource based on your input.
39+
#
40+
# ## For Developers
41+
#
42+
# When implementing a new sequence type (subtype of `BlochSimulator`), you must define:
43+
# 1. `simulate_magnetization!(output, sequence, state, tissue_properties)` - Core simulation
44+
# 2. `initialize_states(resource, sequence)` - Initialize state vectors
45+
# 3. `output_size(sequence)` - Dimensions of output per voxel
46+
# 4. `output_eltype(sequence)` - Element type of output (typically ComplexF32 or ComplexF64)
47+
#
48+
# See the Developer Guide in README.md for more details.
49+
#=========================================================================================#
50+
51+
#=========================================================================================
52+
# PUBLIC API
53+
=========================================================================================#
54+
155
"""
256
simulate_magnetization(resource, sequence, parameters)
357
@@ -47,6 +101,42 @@ function simulate_magnetization(
47101
return magnetization
48102
end
49103
104+
#=========================================================================================
105+
# CONVENIENCE METHODS - Automatic Resource Selection
106+
=========================================================================================#
107+
108+
"""
109+
simulate_magnetization(sequence, parameters)
110+
111+
Convenience function to simulate magnetization without specifying the computational
112+
resource. The function automatically selects the appropriate resource based on the type of
113+
the `sequence` and `parameters`. The fallback case is to use multi-threaded CPU
114+
computations.
115+
116+
# Automatic Resource Selection Rules
117+
- Single `AbstractTissueProperties` → `CPU1()` (single-threaded)
118+
- `StructArray` on CPU → `CPUThreads()` (multi-threaded, default)
119+
- `CuArray` → `CUDALibs()` (GPU)
120+
- `DArray` → `CPUProcesses()` (distributed across workers)
121+
"""
122+
function simulate_magnetization(sequence::BlochSimulator, parameters::StructArray)
123+
124+
sequence_on_gpu = _all_arrays_are_cuarrays(sequence)
125+
parameters_on_gpu = _all_arrays_are_cuarrays(parameters)
126+
127+
if xor(sequence_on_gpu, parameters_on_gpu)
128+
throw(ArgumentError("Both sequence and parameters must be on the GPU or not on the GPU"))
129+
end
130+
131+
if sequence_on_gpu && parameters_on_gpu
132+
return simulate_magnetization(CUDALibs(), sequence, parameters)
133+
elseif parameters isa DArray
134+
return simulate_magnetization(CPUProcesses(), sequence, parameters)
135+
else
136+
return simulate_magnetization(CPUThreads(), sequence, parameters)
137+
end
138+
end
139+
50140
"""
51141
If no `resource` is provided, the simulation is performed on the CPU in a multi-threaded
52142
fashion by default.
@@ -80,31 +170,13 @@ function simulate_magnetization(sequence, tissue_properties::AbstractTissuePrope
80170
simulate_magnetization(CPU1(), sequence, parameters)
81171
end
82172
83-
"""
84-
function simulate_magnetization(sequence, parameters)
85-
86-
Convenience function to simulate magnetization without specifying the computational
87-
resource. The function automatically selects the appropriate resource based on the type of
88-
the `sequence` and `parameters`. The fallback case is to use multi-threaded CPU
89-
computations.
90-
"""
91-
function simulate_magnetization(sequence::BlochSimulator, parameters::StructArray)
92-
93-
sequence_on_gpu = _all_arrays_are_cuarrays(sequence)
94-
parameters_on_gpu = _all_arrays_are_cuarrays(parameters)
95-
96-
if xor(sequence_on_gpu, parameters_on_gpu)
97-
throw(ArgumentError("Both sequence and parameters must be on the GPU or not on the GPU"))
98-
end
99-
100-
if sequence_on_gpu && parameters_on_gpu
101-
return simulate_magnetization(CUDALibs(), sequence, parameters)
102-
elseif parameters isa DArray
103-
return simulate_magnetization(CPUProcesses(), sequence, parameters)
104-
else
105-
return simulate_magnetization(CPUThreads(), sequence, parameters)
106-
end
107-
end
173+
#=========================================================================================
174+
# RESOURCE-SPECIFIC IMPLEMENTATIONS
175+
#
176+
# These methods implement the parallel execution strategy for each computational resource.
177+
# They are called by the public API after memory allocation and dispatch based on the
178+
# `AbstractResource` type.
179+
=========================================================================================#
108180
109181
"""
110182
simulate_magnetization!(magnetization, resource, sequence, parameters)
@@ -113,55 +185,62 @@ Simulate the magnetization response for all combinations of tissue properties co
113185
`parameters` and stores the results in the pre-allocated `magnetization` array. The actual
114186
implementation depends on the computational resource specified in `resource`.
115187
116-
This function is called by `simulate_magnetization` and is not intended considered part of
117-
te public API.
188+
This function is called by `simulate_magnetization` and is not considered part of the
189+
public API.
118190
"""
119191
function simulate_magnetization!(magnetization, ::CPU1, sequence, parameters)
120-
vd = length(size(magnetization))
192+
# Serial execution: Loop over voxels one at a time
193+
vd = length(size(magnetization)) # Get voxel dimension (last dimension)
121194
for voxel ∈ eachindex(parameters)
195+
# Initialize state vector for this voxel
122196
state = initialize_states(CPU1(), sequence)
197+
# Simulate and store result in the voxel's slice of the output array
123198
simulate_magnetization!(selectdim(magnetization, vd, voxel), sequence, state, parameters[voxel])
124199
end
125200
return nothing
126201
end
127202
128203
function simulate_magnetization!(magnetization, ::CPUThreads, sequence, parameters)
129-
vd = length(size(magnetization))
204+
# Parallel execution using Julia threads: Each thread processes voxels independently
205+
vd = length(size(magnetization)) # Get voxel dimension (last dimension)
130206
Threads.@threads for voxel ∈ eachindex(parameters)
207+
# Each thread initializes its own state vector (thread-safe)
131208
state = initialize_states(CPUThreads(), sequence)
209+
# Simulate and store result in the voxel's slice of the output array
132210
simulate_magnetization!(selectdim(magnetization, vd, voxel), sequence, state, parameters[voxel])
133211
end
134212
return nothing
135213
end
136214
137215
function simulate_magnetization!(magnetization, ::CPUProcesses, sequence, parameters::DArray)
138-
# Spawn tasks on each worker
216+
# Distributed execution across workers: Each worker processes its local partition
217+
# The `:lp` selector accesses the local partition of the DArray on each worker
139218
@sync [@spawnat p simulate_magnetization!(magnetization[:lp], CPU1(), sequence, parameters[:lp]) for p in workers()]
140219
return nothing
141220
end
142221
143222
function simulate_magnetization!(magnetization, ::CUDALibs, sequence, parameters)
144-
145-
# Each voxel gets assigned `WARPSIZE` threads. Since `THREADS_PER_BLOCK `is fixed,
146-
# the number of required blocks can then be calculated
223+
# GPU execution: Launch CUDA kernel with multiple threads per voxel
224+
# Each voxel gets assigned `WARPSIZE` threads for coalesced memory access
225+
# Since `THREADS_PER_BLOCK` is fixed, the number of required blocks is calculated
147226
nr_voxels = length(parameters)
148227
nr_blocks = cld(nr_voxels * WARPSIZE, THREADS_PER_BLOCK)
149228
150-
# define kernel
229+
# Define kernel function (executed on GPU)
151230
magnetization_kernel!(magnetization, sequence, parameters) = begin
152231
153-
# get voxel index
232+
# Calculate voxel index from block and thread indices
154233
voxel = cld((blockIdx().x - Int32(1)) * blockDim().x + threadIdx().x, WARPSIZE)
155234
156-
# # initialize state that gets updated during time integration
235+
# Initialize state that gets updated during time integration
157236
states = initialize_states(CUDALibs(), sequence)
158237
159-
# do nothing if voxel index is out of bounds
238+
# Do nothing if voxel index is out of bounds (extra threads in last block)
160239
if voxel > length(parameters)
161240
return nothing
162241
end
163242
164-
# run simulation for voxel
243+
# Run simulation for this voxel
165244
simulate_magnetization!(
166245
view(magnetization, :, voxel),
167246
sequence,
@@ -170,21 +249,28 @@ function simulate_magnetization!(magnetization, ::CUDALibs, sequence, parameters
170249
)
171250
end
172251
252+
# Launch kernel and synchronize
173253
CUDA.@sync begin
174254
@cuda blocks = nr_blocks threads = THREADS_PER_BLOCK magnetization_kernel!(magnetization, sequence, parameters)
175255
end
176256
return nothing
177257
end
178258
259+
#=========================================================================================
260+
# INTERNAL UTILITIES
261+
#
262+
# Helper functions for memory allocation and array management.
263+
=========================================================================================#
264+
179265
"""
180266
_allocate_magnetization_array(resource, sequence, parameters)
181267
182268
Allocate an array to store the output of the Bloch simulations (per voxel, echo times only)
183269
to be performed with the `sequence`. For each `BlochSimulator`, methods should have been
184270
added to `output_eltype` and `output_size` for this function to work properly.
185271
186-
This function is called by `simulate_magnetization` and is not intended considered part of
187-
te public API.
272+
This function is called by `simulate_magnetization` and is not considered part of the
273+
public API.
188274
189275
# Returns
190276
- `magnetization_array`: An array allocated on the specified `resource`, formatted to store
@@ -206,8 +292,8 @@ Allocate an array on the specified `resource` with the given element type `_elty
206292
`resource` is `CUDALibs()`, the array is allocated on the GPU. For `CPUProcesses()`, the
207293
array is distributed in the "voxel"-dimension over multiple CPU workers.
208294
209-
This function is called by `_allocate_magnetization_array` and is not intended considered
210-
part of te public API.
295+
This function is called by `_allocate_magnetization_array` and is not considered part of
296+
the public API.
211297
"""
212298
function _allocate_array_on_resource(::Union{CPU1,CPUThreads}, _eltype, _size)
213299
return zeros(_eltype, _size...)

0 commit comments

Comments
 (0)