diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8624c38f..c14eb8c3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,7 +6,7 @@ on: pull_request: jobs: test: - name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} + name: Julia ${{ matrix.version }} - ${{ matrix.devitoversion }} - ${{ github.event_name }} runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -22,6 +22,14 @@ jobs: - ubuntu-latest arch: - x64 + + env: + DEVITO_LANGUAGE: "openmp" + DEVITO_ARCH: "gcc" + OMP_NUM_THREADS: "2" + DEVITO_AUTOPADDING: "0" + RDMAV_FORK_SAFE: 1 + steps: - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v2 @@ -30,16 +38,17 @@ jobs: arch: ${{ matrix.arch }} - uses: julia-actions/cache@v1 - - name: install mpi - run: sudo apt-get update - - run: sudo apt-get install -y mpich libmpich-dev + - name: Setup MPI + uses: mpi4py/setup-mpi@v1 + with: + mpi: 'openmpi' - name: download miniconda manually run: wget https://repo.anaconda.com/miniconda/Miniconda3-py312_24.5.0-0-Linux-x86_64.sh - + - name: run install run: bash Miniconda3-py312_24.5.0-0-Linux-x86_64.sh -b -p $HOME/miniconda - + - name: run path export run: | echo "$HOME/miniconda/bin:" >> $GITHUB_PATH @@ -47,26 +56,30 @@ jobs: echo "PYCALL_JL_RUNTIME_PYTHON=$HOME/miniconda/bin/python3" >> $GITHUB_ENV echo "CONDA_EXE=$HOME/miniconda/bin/conda" >> $GITHUB_ENV - - name: use system MPI - run: julia -e 'using Pkg; Pkg.add(["MPI", "MPIPreferences"]); using MPIPreferences; MPIPreferences.use_system_binary()' - - name: run build for devito/devitopro if: matrix.devitoversion == 'devitopro' run: julia --color=yes --project -e 'using Pkg; Pkg.build(verbose=true)' env: DEVITO_PRO: ${{ secrets.DEVITOPRO }} - + - name: run build for devito if: ! matrix.devitoversion != 'devitopro' run: julia --color=yes --project -e 'using Pkg; Pkg.build(verbose=true)' env: DEVITO_BRANCH: ${{ matrix.devitoversion }} - + + - name: use system MPI + run: | + # https://juliaparallel.org/MPI.jl/latest/configuration/#Configuration-of-the-MPI.jl-testsuite + julia --project=test -e 'using Pkg; Pkg.instantiate(); Pkg.status(); using MPIPreferences; MPIPreferences.use_system_binary()' + # note Pkg.test docs indicate --inline=no can improve coverage - - run: julia --color=yes --check-bounds=yes --inline=no --project -e 'using Pkg; Pkg.test(coverage=true)' - + - name: run tests + run: | + julia --color=yes --check-bounds=yes --inline=no --project -e 'using Pkg; Pkg.test(coverage=true)' + - uses: julia-actions/julia-processcoverage@v1 - + - uses: codecov/codecov-action@v1 with: file: lcov.info @@ -79,17 +92,17 @@ jobs: - uses: julia-actions/setup-julia@v2 with: version: '1' - + - name: install mpi run: sudo apt-get update - run: sudo apt-get install -y mpich - + - name: download miniconda manually run: wget https://repo.anaconda.com/miniconda/Miniconda3-py312_24.5.0-0-Linux-x86_64.sh - + - name: run install run: bash Miniconda3-py312_24.5.0-0-Linux-x86_64.sh -b -p ~/miniconda - + - name: run path export run: | echo "$HOME/miniconda/bin:" >> $GITHUB_PATH @@ -97,11 +110,11 @@ jobs: echo "PYCALL_JL_RUNTIME_PYTHON=$HOME/miniconda/bin/python3" >> $GITHUB_ENV echo "CONDA_EXE=$HOME/miniconda/bin/conda" >> $GITHUB_ENV - - run: julia --color=yes --project -e 'using Pkg; if VERSION >= v"1.1.0-rc1"; Pkg.build(verbose=true); else Pkg.build(); end' + - run: julia --color=yes --project -e 'using Pkg; Pkg.build()' - name: Install dependencies run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' - + - run: | julia --project=docs -e ' using Pkg diff --git a/Project.toml b/Project.toml index 4fc9fad7..ad9bff5f 100644 --- a/Project.toml +++ b/Project.toml @@ -4,12 +4,17 @@ authors = ["Sam Kaplan "] version = "1.0.0" [deps] -MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" Strided = "5e0ebb24-38b0-5f93-81fe-25c709ecae67" +[weakdeps] +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" + +[extensions] +MPIExt = "MPI" + [compat] -MPI = "0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20" +MPI = "0.20" PyCall = "1" Strided = "1" julia = "1.9" diff --git a/ext/MPIExt.jl b/ext/MPIExt.jl new file mode 100644 index 00000000..0339000e --- /dev/null +++ b/ext/MPIExt.jl @@ -0,0 +1,454 @@ +module MPIExt + +using MPI + +using Devito +using Devito.PyCall +using Devito.Strided + +import Devito: DiscreteFunction, TimeFunction, SparseFunction, SparseTimeFunction, SubFunction, SparseDiscreteFunction +import Devito: DevitoMPITrue, Function, inhalo, size_with_inhalo, halo, mycoords, topology, decomposition, parent +import Devito: localmask, localmask_with_halo, localmask_with_inhalo, decomposition_with_halo +import Devito: localindices, localindices_with_halo, localindices_with_inhalo +import Devito: data_allocated, data, data_with_halo, data_with_inhalo + +abstract type DevitoMPIAbstractArray{T,N} <: AbstractArray{T,N} end + +Base.parent(x::DevitoMPIAbstractArray) = x.p +localsize(x::DevitoMPIAbstractArray{T,N}) where {T,N} = ntuple(i->size(x.local_indices[i])[1], N) +localindices(x::DevitoMPIAbstractArray{T,N}) where {T,N} = x.local_indices +decomposition(x::DevitoMPIAbstractArray) = x.decomposition +topology(x::DevitoMPIAbstractArray) = x.topology + + +function _size_from_local_indices(local_indices::NTuple{N,UnitRange{Int64}}) where {N} + n = Devito.ntuple(i->(size(local_indices[i])[1] > 0 ? local_indices[i][end] : 0), N) + MPI.Allreduce(n, max, MPI.COMM_WORLD) +end + +Base.size(x::DevitoMPIAbstractArray) = x.size + +function counts(x::DevitoMPIAbstractArray) + [count(x, mycoords) for mycoords in CartesianIndices(topology(x))][:] +end + +function Base.fill!(x::DevitoMPIAbstractArray, v) + parent(x) .= v + MPI.Barrier(MPI.COMM_WORLD) + x +end + +Base.IndexStyle(::Type{<:DevitoMPIAbstractArray}) = IndexCartesian() + +struct DevitoMPIArray{T,N,A<:AbstractArray{T,N},D} <: DevitoMPIAbstractArray{T,N} + o::PyObject + p::A + local_indices::NTuple{N,UnitRange{Int}} + decomposition::D + topology::NTuple{N,Int} + size::NTuple{N,Int} +end + +function DevitoMPIArray{T,N}(o, idxs, decomp::D, topo) where {T,N,D} + p = unsafe_wrap(Array{T,N}, Ptr{T}(o.__array_interface__["data"][1]), length.(idxs); own=false) + n = _size_from_local_indices(idxs) + DevitoMPIArray{T,N,Array{T,N},D}(o, p, idxs, decomp, topo, n) +end + +function Base.convert(::Type{Array}, x::DevitoMPIAbstractArray{T,N}) where {T,N} + local y + if MPI.Comm_rank(MPI.COMM_WORLD) == 0 + y = zeros(T, length(x)) + y_vbuffer = VBuffer(y, counts(x)) + else + y = Array{T}(undef, ntuple(_->0, N)) + y_vbuffer = VBuffer(nothing) + end + + _x = zeros(T, size(parent(x))) + + copyto!(_x, parent(x)) + MPI.Gatherv!(_x, y_vbuffer, 0, MPI.COMM_WORLD) + + if MPI.Comm_rank(MPI.COMM_WORLD) == 0 + _y = Devito.convert_resort_array!(Array{T,N}(undef, size(x)), y, x.topology, x.decomposition) + else + _y = Array{T,N}(undef, ntuple(_->0, N)) + end + _y +end + + +function Base.copyto!(dst::DevitoMPIArray{T,N}, src::AbstractArray{T,N}) where {T,N} + _counts = counts(dst) + if MPI.Comm_rank(MPI.COMM_WORLD) == 0 + _y = Devito.copyto_resort_array!(Vector{T}(undef, length(src)), src, dst.topology, dst.decomposition) + data_vbuffer = VBuffer(_y, _counts) + else + data_vbuffer = VBuffer(nothing) + end + + _dst = MPI.Scatterv!(data_vbuffer, Vector{T}(undef, _counts[MPI.Comm_rank(MPI.COMM_WORLD)+1]), 0, MPI.COMM_WORLD) + copyto!(parent(dst), _dst) +end + +struct DevitoMPITimeArray{T,N,A<:AbstractArray{T,N},NM1,D} <: DevitoMPIAbstractArray{T,N} + o::PyObject + p::A + local_indices::NTuple{N,UnitRange{Int}} + decomposition::D + topology::NTuple{NM1,Int} + size::NTuple{N,Int} +end + +function DevitoMPITimeArray{T,N}(o, idxs, decomp::D, topo::NTuple{NM1,Int}) where {T,N,D,NM1} + p = unsafe_wrap(Array{T,N}, Ptr{T}(o.__array_interface__["data"][1]), length.(idxs); own=false) + n = _size_from_local_indices(idxs) + DevitoMPITimeArray{T,N,Array{T,N},NM1,D}(o, p, idxs, decomp, topo, n) +end + +function Base.convert(::Type{Array}, x::DevitoMPITimeArray{T,N}) where {T,N} + local y + if MPI.Comm_rank(MPI.COMM_WORLD) == 0 + y = zeros(T, length(x)) + y_vbuffer = VBuffer(y, counts(x)) + else + y = Vector{T}(undef, 0) + y_vbuffer = VBuffer(nothing) + end + MPI.Gatherv!(convert(Array, parent(x)), y_vbuffer, 0, MPI.COMM_WORLD) + + if MPI.Comm_rank(MPI.COMM_WORLD) == 0 + _y = Devito.convert_resort_array!(Array{T,N}(undef, size(x)), y, x.topology, x.decomposition) + else + _y = zeros(T, ntuple(_->0, N)) + end + + _y +end + +function Base.copy!(dst::DevitoMPIAbstractArray, src::AbstractArray) + if MPI.Comm_rank(MPI.COMM_WORLD) == 0 + axes(dst) == axes(src) || throw(ArgumentError( + "arrays must have the same axes for copy! (consider using `copyto!`)")) + end + copyto!(dst, src) +end + +function Base.copy!(dst::DevitoMPIAbstractArray{T,1}, src::AbstractVector) where {T} + if MPI.Comm_rank(MPI.COMM_WORLD) == 0 + axes(dst) == axes(src) || throw(ArgumentError( + "arrays must have the same axes for copy! (consider using `copyto!`)")) + end + copyto!(dst, src) +end + +function Base.copyto!(dst::DevitoMPITimeArray{T,N}, src::AbstractArray{T,N}) where {T,N} + _counts = counts(dst) + if MPI.Comm_rank(MPI.COMM_WORLD) == 0 + _y = Devito.copyto_resort_array!(Vector{T}(undef, length(src)), src, dst.topology, dst.decomposition) + data_vbuffer = VBuffer(_y, _counts) + else + data_vbuffer = VBuffer(nothing) + end + + _dst = MPI.Scatterv!(data_vbuffer, Vector{T}(undef, _counts[MPI.Comm_rank(MPI.COMM_WORLD)+1]), 0, MPI.COMM_WORLD) + copyto!(parent(dst), _dst) +end + +struct DevitoMPISparseTimeArray{T,N,NM1,D} <: DevitoMPIAbstractArray{T,NM1} + o::PyObject + p::Array{T,NM1} + local_indices::NTuple{NM1,Vector{Int}} + decomposition::D + topology::NTuple{NM1,Int} + size::NTuple{NM1,Int} +end + +function DevitoMPISparseTimeArray{T,N}(o, idxs, decomp::D, topo::NTuple{NM1,Int}) where {T,N,D,NM1} + local p + if length(idxs) == 0 + p = Array{T,N}(undef, ntuple(_->0, N)) + else + p = unsafe_wrap(Array{T,N}, Ptr{T}(o.__array_interface__["data"][1]), length.(idxs); own=false) + end + DevitoMPISparseTimeArray{T,N,NM1,D}(o, p, idxs, decomp, topo, globalsize(decomp)) +end + +localsize(x::DevitoMPISparseTimeArray) = length.(x.local_indices) + + +struct DevitoMPISparseArray{T,N,NM1,D} <: DevitoMPIAbstractArray{T,N} + o::PyObject + p::Array{T,NM1} + local_indices::NTuple{NM1,Vector{Int}} + decomposition::D + topology::NTuple{NM1,Int} + size::NTuple{NM1,Int} +end + +function DevitoMPISparseArray{T,N}(o, idxs, decomp::D, topo::NTuple{NM1,Int}) where {T,N,D,NM1} + local p + if prod(length.(idxs)) == 0 + p = Array{T,N}(undef, ntuple(_->0, N)) + else + p = unsafe_wrap(Array{T,N}, Ptr{T}(o.__array_interface__["data"][1]), length.(idxs); own=false) + end + DevitoMPISparseArray{T,N,NM1,D}(o, p, idxs, decomp, topo, globalsize(decomp)) +end + +localsize(x::DevitoMPISparseArray) = length.(x.local_indices) + +globalsize(decomp) = ntuple( i -> max(cat(decomp[i]..., dims=1)...) - min(cat(decomp[i]..., dims=1)...) + 1 , length(decomp)) + +function count(x::Union{DevitoMPIArray,DevitoMPITimeArray,DevitoMPISparseArray,DevitoMPISparseTimeArray}, mycoords) + d = decomposition(x) + n = size(x) + mapreduce(idim->d[idim] === nothing ? n[idim] : length(d[idim][mycoords[idim]]), *, 1:length(d)) +end + +function Base.convert(::Type{Array}, x::Union{DevitoMPISparseTimeArray{T,N},DevitoMPISparseArray{T,N}}) where {T,N} + local y + if MPI.Comm_rank(MPI.COMM_WORLD) == 0 + y = zeros(T, length(x)) + y_vbuffer = VBuffer(y, counts(x)) + else + y = Array{T,N}(undef, ntuple(_->0, N)) + y_vbuffer = VBuffer(nothing) + end + _x = zeros(T, size(parent(x))) + copyto!(_x, parent(x)) + MPI.Gatherv!(_x, y_vbuffer, 0, MPI.COMM_WORLD) + + if MPI.Comm_rank(MPI.COMM_WORLD) == 0 + _y = Devito.convert_resort_array!(Array{T,N}(undef, size(x)), y, x.topology, x.decomposition) + else + _y = Array{T,N}(undef, ntuple(_->0, N)) + end + _y +end + +function Base.copyto!(dst::Union{DevitoMPISparseTimeArray{T,N},DevitoMPISparseArray{T,N}}, src::Array{T,N}) where {T,N} + _counts = counts(dst) + if MPI.Comm_rank(MPI.COMM_WORLD) == 0 + _y = Devito.copyto_resort_array!(Vector{T}(undef, length(src)), src, dst.topology, dst.decomposition) + data_vbuffer = VBuffer(_y, _counts) + else + data_vbuffer = VBuffer(nothing) + end + _dst = MPI.Scatterv!(data_vbuffer, Vector{T}(undef, _counts[MPI.Comm_rank(MPI.COMM_WORLD)+1]), 0, MPI.COMM_WORLD) + copyto!(parent(dst), _dst) +end + + +function find_rank(x::DevitoMPIAbstractArray{T,N}, I::Vararg{Int,N}) where {T,N} + decomp = decomposition(x) + rank_position = Devito.in_range.(I,decomp) + helper = Devito.helix_helper(topology(x)) + rank = sum((rank_position .- 1) .* helper) + return rank +end + + +function Base.getindex(x::DevitoMPIAbstractArray{T,N}, I::Vararg{Int,N}) where {T,N} + v = nothing + wanted_rank = find_rank(x, I...) + if MPI.Comm_rank(MPI.COMM_WORLD) == wanted_rank + J = ntuple(idim-> Devito.shift_localindicies( I[idim], localindices(x)[idim]), N) + v = getindex(x.p, J...) + end + v = MPI.bcast(v, wanted_rank, MPI.COMM_WORLD) + v +end + +function Base.setindex!(x::DevitoMPIAbstractArray{T,N}, v::T, I::Vararg{Int,N}) where {T,N} + myrank = MPI.Comm_rank(MPI.COMM_WORLD) + if myrank == 0 + @warn "`setindex!` for Devito MPI Arrays has suboptimal performance. consider using `copy!`" + end + wanted_rank = find_rank(x, I...) + if wanted_rank == 0 + received_v = v + else + message_tag = 2*MPI.Comm_size(MPI.COMM_WORLD) + source_rank = 0 + send_mesg = [v] + recv_mesg = 0 .* send_mesg + rreq = ( myrank == wanted_rank ? MPI.Irecv!(recv_mesg, source_rank, message_tag, MPI.COMM_WORLD) : MPI.Request()) + sreq = ( myrank == source_rank ? MPI.Isend(send_mesg, wanted_rank, message_tag, MPI.COMM_WORLD) : MPI.Request() ) + stats = MPI.Waitall!([rreq, sreq]) + received_v = recv_mesg[1] + end + if myrank == wanted_rank + J = ntuple(idim-> Devito.shift_localindicies( I[idim], localindices(x)[idim]), N) + setindex!(x.p, received_v, J...) + end + MPI.Barrier(MPI.COMM_WORLD) +end + +Base.size(x::SparseDiscreteFunction{T,N,DevitoMPITrue}) where {T,N} = size(data(x)) + +function Devito.data(x::Function{T,N,DevitoMPITrue}) where {T,N} + p = sview(parent(data_allocated(x)), localmask(x)...) + d = decomposition(x) + t = topology(x) + idxs = localindices(x) + n = _size_from_local_indices(idxs) + DevitoMPIArray{T,N,typeof(p),typeof(d)}(x.o."_data_allocated", p, idxs, d, t, n) +end + +function Devito.data_with_halo(x::Function{T,N,DevitoMPITrue}) where {T,N} + p = sview(parent(data_allocated(x)), localmask_with_halo(x)...) + d = decomposition_with_halo(x) + t = topology(x) + idxs = localindices_with_halo(x) + n = _size_from_local_indices(idxs) + DevitoMPIArray{T,N,typeof(p),typeof(d)}(x.o."_data_allocated", p, idxs, d, t, n) +end + +function Devito.data_with_inhalo(x::Function{T,N,DevitoMPITrue}) where {T,N} + p = sview(parent(data_allocated(x)), localmask_with_inhalo(x)...) + d = decomposition_with_inhalo(x) + t = topology(x) + idxs = localindices_with_inhalo(x) + n = _size_from_local_indices(idxs) + DevitoMPIArray{T,N,typeof(p),typeof(d)}(x.o."_data_allocated", p, idxs, d, t, n) +end + +function data_allocated(x::Function{T,N,DevitoMPITrue}) where {T,N} + DevitoMPIArray{T,N}(x.o."_data_allocated", localindices_with_inhalo(x), decomposition(x), topology(x)) +end + +function Devito.data(x::TimeFunction{T,N,DevitoMPITrue}) where {T,N} + p = sview(parent(data_allocated(x)), localmask(x)...) + d = decomposition(x) + t = topology(x) + idxs = localindices(x) + n = _size_from_local_indices(idxs) + DevitoMPITimeArray{T,N,typeof(p),length(t),typeof(d)}(x.o."_data_allocated", p, idxs, d, t, n) +end + +function Devito.data_with_halo(x::TimeFunction{T,N,DevitoMPITrue}) where {T,N} + p = sview(parent(data_allocated(x)), localmask_with_halo(x)...) + d = decomposition_with_halo(x) + t = topology(x) + idxs = localindices_with_halo(x) + n = _size_from_local_indices(idxs) + DevitoMPITimeArray{T,N,typeof(p),length(t),typeof(d)}(x.o."_data_allocated", p, idxs, d, t, n) +end + +function Devito.data_with_inhalo(x::TimeFunction{T,N,DevitoMPITrue}) where {T,N} + p = sview(parent(data_allocated(x)), localmask_with_inhalo(x)...) + d = decomposition_with_inhalo(x) + t = topology(x) + idxs = localindices_with_inhalo(x) + n = _size_from_local_indices(idxs) + DevitoMPITimeArray{T,N,typeof(p),length(t),typeof(d)}(x.o."_data_allocated", p, idxs, d, t, n) +end + +function data_allocated(x::TimeFunction{T,N,DevitoMPITrue}) where {T,N} + DevitoMPITimeArray{T,N}(x.o."_data_allocated", localindices_with_inhalo(x), decomposition(x), topology(x)) +end + +function data_allocated(x::SubFunction{T,2,DevitoMPITrue}) where {T} + topo = (1, MPI.Comm_size(MPI.COMM_WORLD)) # topo is not defined for sparse decompositions + d = DevitoMPIArray{T,2}(x.o."_data_allocated", localindices(x), decomposition(x), topo) +end + +sparsetopo(x::Union{SparseFunction{T,N,DevitoMPITrue},SparseTimeFunction{T,N,DevitoMPITrue}}) where {T,N} = ntuple(i-> length(decomposition(x)[i]) > 1 ? MPI.Comm_size(MPI.COMM_WORLD) : 1, N) + +localindxhelper(x) = length(x) > 1 ? x[MPI.Comm_rank(MPI.COMM_WORLD)+1] : x[1] + +sparseindices(x::Union{SparseFunction{T,N,DevitoMPITrue},SparseTimeFunction{T,N,DevitoMPITrue}}) where {T,N} = localindxhelper.(decomposition(x)) + +function Devito.data_with_inhalo(x::SparseFunction{T,N,DevitoMPITrue}) where {T,N} + d = DevitoMPISparseArray{T,N}(x.o."_data_allocated", sparseindices(x), decomposition(x), sparsetopo(x)) + MPI.Barrier(MPI.COMM_WORLD) + d +end + +# TODO - needed? <-- +function Devito.data_with_inhalo(x::SparseTimeFunction{T,N,DevitoMPITrue}) where {T,N} + d = DevitoMPISparseTimeArray{T,N}(x.o."_data_allocated", sparseindices(x), decomposition(x), sparsetopo(x)) + MPI.Barrier(MPI.COMM_WORLD) + d +end + + +function localindices(x::DiscreteFunction{T,N,DevitoMPITrue}) where {T,N} + localinds = PyCall.trygetproperty(x.o,"local_indices",nothing) + if localinds === nothing + return ntuple(i -> 0:-1, N) + else + return ntuple(i->convert(Int,localinds[N-i+1].start)+1:convert(Int,localinds[N-i+1].stop), N) + end +end + + +function decomposition_with_inhalo(x::DiscreteFunction{T,N,DevitoMPITrue}) where {T,N} + _decomposition = Devito.getdecomp(x) + h = inhalo(x) + ntuple( + idim->begin + if _decomposition[idim] === nothing + nothing + else + M = length(_decomposition[idim]) + ntuple( + ipart->begin + n = length(_decomposition[idim][ipart]) + strt = _decomposition[idim][ipart][1] + (h[idim][1] + h[idim][2])*(ipart-1) + 1 + stop = _decomposition[idim][ipart][end] + (h[idim][1] + h[idim][2])*ipart + 1 + [strt:stop;] + end, + M + ) + end + end, + N + ) +end + +function localindices_with_inhalo(x::DiscreteFunction{T,N,DevitoMPITrue}) where {T,N} + h = inhalo(x) + localidxs = localindices(x) + n = size_with_inhalo(x) + _mycoords = mycoords(x) + _decomposition = decomposition(x) + + ntuple(idim->begin + local strt,stop + if _decomposition[idim] == nothing + strt = 1 + stop = n[idim] + else + strt = localidxs[idim][1] + (_mycoords[idim]-1)*(h[idim][1] + h[idim][2]) + stop = strt + length(localidxs[idim]) - 1 + h[idim][1] + h[idim][2] + end + strt:stop + end, N) +end + +function localindices_with_halo(x::DiscreteFunction{T,N,DevitoMPITrue}) where {T,N} + h = halo(x) + localidxs = localindices(x) + n = size_with_halo(x) + + _mycoords = mycoords(x) + _topology = topology(x) + _decomposition = decomposition(x) + + ntuple(idim->begin + local strt,stop + if _decomposition[idim] == nothing + strt = 1 + stop = n[idim] + else + strt = _mycoords[idim] == 1 ? localidxs[idim][1] : localidxs[idim][1] + h[idim][1] + stop = _mycoords[idim] == _topology[idim] ? localidxs[idim][end] + h[idim][1] + h[idim][2] : localidxs[idim][end] + h[idim][1] + end + strt:stop + end, N) +end + +end \ No newline at end of file diff --git a/src/Devito.jl b/src/Devito.jl index 3aff04b6..daef12d5 100644 --- a/src/Devito.jl +++ b/src/Devito.jl @@ -1,6 +1,6 @@ module Devito -using MPI, PyCall, Strided +using PyCall, Strided const numpy = PyNULL() const sympy = PyNULL() @@ -102,6 +102,9 @@ function _numpy_eltype(dtype) end end +abstract type DevitoMPI end +struct DevitoMPIFalse <: DevitoMPI end +struct DevitoMPITrue <: DevitoMPI end """ configuration!(key, value) @@ -160,84 +163,6 @@ Base.IndexStyle(::Type{<:DevitoArray{<:Any,<:Any,<:StridedView}}) = IndexCartesi Base.view(x::DevitoArray{T,N,Array{T,N}}, I::Vararg{Any}) where {T,N} = DevitoArray(x.o, sview(x.p, I...)) -abstract type DevitoMPIAbstractArray{T,N} <: AbstractArray{T,N} end - -Base.parent(x::DevitoMPIAbstractArray) = x.p -localsize(x::DevitoMPIAbstractArray{T,N}) where {T,N} = ntuple(i->size(x.local_indices[i])[1], N) -localindices(x::DevitoMPIAbstractArray{T,N}) where {T,N} = x.local_indices -decomposition(x::DevitoMPIAbstractArray) = x.decomposition -topology(x::DevitoMPIAbstractArray) = x.topology - -function _size_from_local_indices(local_indices::NTuple{N,UnitRange{Int64}}) where {N} - n = ntuple(i->(size(local_indices[i])[1] > 0 ? local_indices[i][end] : 0), N) - MPI.Allreduce(n, max, MPI.COMM_WORLD) -end - -Base.size(x::DevitoMPIAbstractArray) = x.size - -function counts(x::DevitoMPIAbstractArray) - [count(x, mycoords) for mycoords in CartesianIndices(topology(x))][:] -end - -function Base.fill!(x::DevitoMPIAbstractArray, v) - parent(x) .= v - MPI.Barrier(MPI.COMM_WORLD) - x -end - -Base.IndexStyle(::Type{<:DevitoMPIAbstractArray}) = IndexCartesian() - -struct DevitoMPIArray{T,N,A<:AbstractArray{T,N},D} <: DevitoMPIAbstractArray{T,N} - o::PyObject - p::A - local_indices::NTuple{N,UnitRange{Int}} - decomposition::D - topology::NTuple{N,Int} - size::NTuple{N,Int} -end - -function DevitoMPIArray{T,N}(o, idxs, decomp::D, topo) where {T,N,D} - p = unsafe_wrap(Array{T,N}, Ptr{T}(o.__array_interface__["data"][1]), length.(idxs); own=false) - n = _size_from_local_indices(idxs) - DevitoMPIArray{T,N,Array{T,N},D}(o, p, idxs, decomp, topo, n) -end - -function convert_resort_array!(_y::Array{T,N}, y::Vector{T}, topology, decomposition) where {T,N} - i = 1 - for block_idx in CartesianIndices(topology) - idxs = CartesianIndices(ntuple(idim->decomposition[idim] === nothing ? size(_y, idim) : length(decomposition[idim][block_idx.I[idim]]), N)) - for _idx in idxs - idx = CartesianIndex(ntuple(idim->decomposition[idim] === nothing ? _idx.I[idim] : decomposition[idim][block_idx.I[idim]][_idx.I[idim]], N)) - _y[idx] = y[i] - i += 1 - end - end - _y -end - -function Base.convert(::Type{Array}, x::DevitoMPIAbstractArray{T,N}) where {T,N} - local y - if MPI.Comm_rank(MPI.COMM_WORLD) == 0 - y = zeros(T, length(x)) - y_vbuffer = VBuffer(y, counts(x)) - else - y = Array{T}(undef, ntuple(_->0, N)) - y_vbuffer = VBuffer(nothing) - end - - _x = zeros(T, size(parent(x))) - - copyto!(_x, parent(x)) - MPI.Gatherv!(_x, y_vbuffer, 0, MPI.COMM_WORLD) - - if MPI.Comm_rank(MPI.COMM_WORLD) == 0 - _y = convert_resort_array!(Array{T,N}(undef, size(x)), y, x.topology, x.decomposition) - else - _y = Array{T,N}(undef, ntuple(_->0, N)) - end - _y -end - function copyto_resort_array!(_y::Vector{T}, y::Array{T,N}, topology, decomposition) where {T,N} i = 1 for block_idx in CartesianIndices(topology) @@ -251,176 +176,6 @@ function copyto_resort_array!(_y::Vector{T}, y::Array{T,N}, topology, decomposit _y end -function Base.copyto!(dst::DevitoMPIArray{T,N}, src::AbstractArray{T,N}) where {T,N} - _counts = counts(dst) - if MPI.Comm_rank(MPI.COMM_WORLD) == 0 - _y = copyto_resort_array!(Vector{T}(undef, length(src)), src, dst.topology, dst.decomposition) - data_vbuffer = VBuffer(_y, _counts) - else - data_vbuffer = VBuffer(nothing) - end - - _dst = MPI.Scatterv!(data_vbuffer, Vector{T}(undef, _counts[MPI.Comm_rank(MPI.COMM_WORLD)+1]), 0, MPI.COMM_WORLD) - copyto!(parent(dst), _dst) -end - -struct DevitoMPITimeArray{T,N,A<:AbstractArray{T,N},NM1,D} <: DevitoMPIAbstractArray{T,N} - o::PyObject - p::A - local_indices::NTuple{N,UnitRange{Int}} - decomposition::D - topology::NTuple{NM1,Int} - size::NTuple{N,Int} -end - -function DevitoMPITimeArray{T,N}(o, idxs, decomp::D, topo::NTuple{NM1,Int}) where {T,N,D,NM1} - p = unsafe_wrap(Array{T,N}, Ptr{T}(o.__array_interface__["data"][1]), length.(idxs); own=false) - n = _size_from_local_indices(idxs) - DevitoMPITimeArray{T,N,Array{T,N},NM1,D}(o, p, idxs, decomp, topo, n) -end - -function Base.convert(::Type{Array}, x::DevitoMPITimeArray{T,N}) where {T,N} - local y - if MPI.Comm_rank(MPI.COMM_WORLD) == 0 - y = zeros(T, length(x)) - y_vbuffer = VBuffer(y, counts(x)) - else - y = Vector{T}(undef, 0) - y_vbuffer = VBuffer(nothing) - end - MPI.Gatherv!(convert(Array, parent(x)), y_vbuffer, 0, MPI.COMM_WORLD) - - if MPI.Comm_rank(MPI.COMM_WORLD) == 0 - _y = convert_resort_array!(Array{T,N}(undef, size(x)), y, x.topology, x.decomposition) - else - _y = zeros(T, ntuple(_->0, N)) - end - - _y -end - -function Base.copy!(dst::DevitoMPIAbstractArray, src::AbstractArray) - if MPI.Comm_rank(MPI.COMM_WORLD) == 0 - axes(dst) == axes(src) || throw(ArgumentError( - "arrays must have the same axes for copy! (consider using `copyto!`)")) - end - copyto!(dst, src) -end - -function Base.copy!(dst::DevitoMPIAbstractArray{T,1}, src::AbstractVector) where {T} - if MPI.Comm_rank(MPI.COMM_WORLD) == 0 - axes(dst) == axes(src) || throw(ArgumentError( - "arrays must have the same axes for copy! (consider using `copyto!`)")) - end - copyto!(dst, src) -end - -function Base.copyto!(dst::DevitoMPITimeArray{T,N}, src::AbstractArray{T,N}) where {T,N} - _counts = counts(dst) - if MPI.Comm_rank(MPI.COMM_WORLD) == 0 - _y = copyto_resort_array!(Vector{T}(undef, length(src)), src, dst.topology, dst.decomposition) - data_vbuffer = VBuffer(_y, _counts) - else - data_vbuffer = VBuffer(nothing) - end - - _dst = MPI.Scatterv!(data_vbuffer, Vector{T}(undef, _counts[MPI.Comm_rank(MPI.COMM_WORLD)+1]), 0, MPI.COMM_WORLD) - copyto!(parent(dst), _dst) -end - -struct DevitoMPISparseTimeArray{T,N,NM1,D} <: DevitoMPIAbstractArray{T,NM1} - o::PyObject - p::Array{T,NM1} - local_indices::NTuple{NM1,Vector{Int}} - decomposition::D - topology::NTuple{NM1,Int} - size::NTuple{NM1,Int} -end - -function DevitoMPISparseTimeArray{T,N}(o, idxs, decomp::D, topo::NTuple{NM1,Int}) where {T,N,D,NM1} - local p - if length(idxs) == 0 - p = Array{T,N}(undef, ntuple(_->0, N)) - else - p = unsafe_wrap(Array{T,N}, Ptr{T}(o.__array_interface__["data"][1]), length.(idxs); own=false) - end - DevitoMPISparseTimeArray{T,N,NM1,D}(o, p, idxs, decomp, topo, globalsize(decomp)) -end - -localsize(x::DevitoMPISparseTimeArray) = length.(x.local_indices) - - -struct DevitoMPISparseArray{T,N,NM1,D} <: DevitoMPIAbstractArray{T,N} - o::PyObject - p::Array{T,NM1} - local_indices::NTuple{NM1,Vector{Int}} - decomposition::D - topology::NTuple{NM1,Int} - size::NTuple{NM1,Int} -end - -function DevitoMPISparseArray{T,N}(o, idxs, decomp::D, topo::NTuple{NM1,Int}) where {T,N,D,NM1} - local p - if prod(length.(idxs)) == 0 - p = Array{T,N}(undef, ntuple(_->0, N)) - else - p = unsafe_wrap(Array{T,N}, Ptr{T}(o.__array_interface__["data"][1]), length.(idxs); own=false) - end - DevitoMPISparseArray{T,N,NM1,D}(o, p, idxs, decomp, topo, globalsize(decomp)) -end - -localsize(x::DevitoMPISparseArray) = length.(x.local_indices) - -globalsize(decomp) = ntuple( i -> max(cat(decomp[i]..., dims=1)...) - min(cat(decomp[i]..., dims=1)...) + 1 , length(decomp)) - -function count(x::Union{DevitoMPIArray,DevitoMPITimeArray,DevitoMPISparseArray,DevitoMPISparseTimeArray}, mycoords) - d = decomposition(x) - n = size(x) - mapreduce(idim->d[idim] === nothing ? n[idim] : length(d[idim][mycoords[idim]]), *, 1:length(d)) -end - -function Base.convert(::Type{Array}, x::Union{DevitoMPISparseTimeArray{T,N},DevitoMPISparseArray{T,N}}) where {T,N} - local y - if MPI.Comm_rank(MPI.COMM_WORLD) == 0 - y = zeros(T, length(x)) - y_vbuffer = VBuffer(y, counts(x)) - else - y = Array{T,N}(undef, ntuple(_->0, N)) - y_vbuffer = VBuffer(nothing) - end - _x = zeros(T, size(parent(x))) - copyto!(_x, parent(x)) - MPI.Gatherv!(_x, y_vbuffer, 0, MPI.COMM_WORLD) - - if MPI.Comm_rank(MPI.COMM_WORLD) == 0 - _y = convert_resort_array!(Array{T,N}(undef, size(x)), y, x.topology, x.decomposition) - else - _y = Array{T,N}(undef, ntuple(_->0, N)) - end - _y -end - -function Base.copyto!(dst::Union{DevitoMPISparseTimeArray{T,N},DevitoMPISparseArray{T,N}}, src::Array{T,N}) where {T,N} - _counts = counts(dst) - if MPI.Comm_rank(MPI.COMM_WORLD) == 0 - _y = copyto_resort_array!(Vector{T}(undef, length(src)), src, dst.topology, dst.decomposition) - data_vbuffer = VBuffer(_y, _counts) - else - data_vbuffer = VBuffer(nothing) - end - _dst = MPI.Scatterv!(data_vbuffer, Vector{T}(undef, _counts[MPI.Comm_rank(MPI.COMM_WORLD)+1]), 0, MPI.COMM_WORLD) - copyto!(parent(dst), _dst) -end - -function in_range(i::Int, ranges) - for rang in enumerate(ranges) - if i ∈ rang[2] - return rang[1] - end - end - error("Outside Valid Ranges") -end - function helix_helper(tup::NTuple{N,Int}) where {N} wrapper = (1,) for i in 2:N @@ -429,54 +184,10 @@ function helix_helper(tup::NTuple{N,Int}) where {N} return wrapper end -function find_rank(x::DevitoMPIAbstractArray{T,N}, I::Vararg{Int,N}) where {T,N} - decomp = decomposition(x) - rank_position = in_range.(I,decomp) - helper = helix_helper(topology(x)) - rank = sum((rank_position .- 1) .* helper) - return rank -end - shift_localindicies(i::Int, indices::Union{UnitRange{Int},Vector{Int}}) = i - indices[1] + 1 shift_localindicies(i::Int, indices::Int) = i - indices + 1 -function Base.getindex(x::DevitoMPIAbstractArray{T,N}, I::Vararg{Int,N}) where {T,N} - v = nothing - wanted_rank = find_rank(x, I...) - if MPI.Comm_rank(MPI.COMM_WORLD) == wanted_rank - J = ntuple(idim-> shift_localindicies( I[idim], localindices(x)[idim]), N) - v = getindex(x.p, J...) - end - v = MPI.bcast(v, wanted_rank, MPI.COMM_WORLD) - v -end - -function Base.setindex!(x::DevitoMPIAbstractArray{T,N}, v::T, I::Vararg{Int,N}) where {T,N} - myrank = MPI.Comm_rank(MPI.COMM_WORLD) - if myrank == 0 - @warn "`setindex!` for Devito MPI Arrays has suboptimal performance. consider using `copy!`" - end - wanted_rank = find_rank(x, I...) - if wanted_rank == 0 - received_v = v - else - message_tag = 2*MPI.Comm_size(MPI.COMM_WORLD) - source_rank = 0 - send_mesg = [v] - recv_mesg = 0 .* send_mesg - rreq = ( myrank == wanted_rank ? MPI.Irecv!(recv_mesg, source_rank, message_tag, MPI.COMM_WORLD) : MPI.Request()) - sreq = ( myrank == source_rank ? MPI.Isend(send_mesg, wanted_rank, message_tag, MPI.COMM_WORLD) : MPI.Request() ) - stats = MPI.Waitall!([rreq, sreq]) - received_v = recv_mesg[1] - end - if myrank == wanted_rank - J = ntuple(idim-> shift_localindicies( I[idim], localindices(x)[idim]), N) - setindex!(x.p, received_v, J...) - end - MPI.Barrier(MPI.COMM_WORLD) -end - # # Dimension # @@ -847,9 +558,6 @@ Base.:(==)(x::AbstractSubDomain,y::AbstractSubDomain) = x.o == y.o # # Functions # -abstract type DevitoMPI end -struct DevitoMPITrue <: DevitoMPI end -struct DevitoMPIFalse <: DevitoMPI end abstract type DiscreteFunction{T,N,M} end @@ -940,13 +648,12 @@ p = TimeFunction(name="p", grid=grid, time_order=2, space_order=8) # TimeFunction{T,N,M}(o) # end -function TimeFunction(args...; lazy=false, kwargs...) - if lazy +function TimeFunction(args...; lazy=false, allowpro=true, kwargs...) + if lazy & allowpro & has_devitopro() o = pycall(devitopro.TimeFunction, PyObject, args...; reversedims(kwargs)...) + elseif ~has_devitopro() | !allowpro + o = pycall(devito.TimeFunction, PyObject, args...; reversedims(kwargs)...) else - if ~has_devitopro() - o = pycall(devito.TimeFunction, PyObject, args...; reversedims(kwargs)...) - end # this is inelegant, TODO: find better way to handle layers. # Issue is that PyCall interpets the layers as tuple, eliminating key metadata. # TODO: Generate MFE and submit as issue to PyCall @@ -983,6 +690,18 @@ end str2serial(y::String) = utils."str2path"(y) +function convert_resort_array!(_y::Array{T,N}, y::Vector{T}, topology, decomposition) where {T,N} + i = 1 + for block_idx in CartesianIndices(topology) + idxs = CartesianIndices(ntuple(idim->decomposition[idim] === nothing ? size(_y, idim) : length(decomposition[idim][block_idx.I[idim]]), N)) + for _idx in idxs + idx = CartesianIndex(ntuple(idim->decomposition[idim] === nothing ? _idx.I[idim] : decomposition[idim][block_idx.I[idim]][_idx.I[idim]], N)) + _y[idx] = y[i] + i += 1 + end + end + _y +end abstract type SparseDiscreteFunction{T,N,M} <: DiscreteFunction{T,N,M} end @@ -1141,10 +860,17 @@ Return the size of the grid associated with `z`, inclusive the the Devito "inner """ size_with_inhalo(x::DiscreteFunction{T,N}) where {T,N} = reverse(x.o._shape_with_inhalo)::NTuple{N,Int} -Base.size(x::SparseDiscreteFunction{T,N,DevitoMPITrue}) where {T,N} = size(data(x)) - size_with_halo(x::SparseDiscreteFunction) = size(x) +function in_range(i::Int, ranges) + for rang in enumerate(ranges) + if i ∈ rang[2] + return rang[1] + end + end + error("Outside Valid Ranges") +end + localmask(x::DiscreteFunction{T,N}) where {T,N} = ntuple(i->convert(Int,x.o._mask_domain[N-i+1].start)+1:convert(Int,x.o._mask_domain[N-i+1].stop), N)::NTuple{N,UnitRange{Int}} localmask_with_halo(x::DiscreteFunction{T,N}) where {T,N} = ntuple(i->convert(Int,x.o._mask_outhalo[N-i+1].start)+1:convert(Int,x.o._mask_outhalo[N-i+1].stop), N)::NTuple{N,UnitRange{Int}} localmask_with_inhalo(x::DiscreteFunction{T,N}) where {T,N} = ntuple(i->convert(Int,x.o._mask_inhalo[N-i+1].start)+1:convert(Int,x.o._mask_inhalo[N-i+1].stop), N)::NTuple{N,UnitRange{Int}} @@ -1255,6 +981,12 @@ this also *collects* the data onto MPI rank 0. """ data_with_inhalo(x::DiscreteFunction{T,N,DevitoMPIFalse}) where {T,N} = view(data_allocated(x), localindices_with_inhalo(x)...) +function data_with_inhalo(x::SparseDiscreteFunction{T,N,DevitoMPIFalse}) where {T,N} + d = DevitoArray{T,N}(x.o."_data_allocated") + d +end + + """ data_allocated(x::DiscreteFunction) @@ -1267,15 +999,6 @@ this also *collects* the data onto MPI rank 0. """ data_allocated(x::DiscreteFunction{T,N,DevitoMPIFalse}) where {T,N} = DevitoArray{T,N}(x.o."_data_allocated") -function localindices(x::DiscreteFunction{T,N,DevitoMPITrue}) where {T,N} - localinds = PyCall.trygetproperty(x.o,"local_indices",nothing) - if localinds === nothing - return ntuple(i -> 0:-1, N) - else - return ntuple(i->convert(Int,localinds[N-i+1].start)+1:convert(Int,localinds[N-i+1].stop), N) - end -end - function one_based_decomposition(decomposition) for idim = 1:length(decomposition) if decomposition[idim] !== nothing @@ -1356,163 +1079,6 @@ end decomposition(x::DiscreteFunction) = one_based_decomposition(getdecomp(x)) decomposition_with_halo(x::DiscreteFunction) = one_based_decomposition(getdecompwithhalo(x)) -function decomposition_with_inhalo(x::DiscreteFunction{T,N,DevitoMPITrue}) where {T,N} - _decomposition = getdecomp(x) - h = inhalo(x) - ntuple( - idim->begin - if _decomposition[idim] === nothing - nothing - else - M = length(_decomposition[idim]) - ntuple( - ipart->begin - n = length(_decomposition[idim][ipart]) - strt = _decomposition[idim][ipart][1] + (h[idim][1] + h[idim][2])*(ipart-1) + 1 - stop = _decomposition[idim][ipart][end] + (h[idim][1] + h[idim][2])*ipart + 1 - [strt:stop;] - end, - M - ) - end - end, - N - ) -end - -function localindices_with_inhalo(x::DiscreteFunction{T,N,DevitoMPITrue}) where {T,N} - h = inhalo(x) - localidxs = localindices(x) - n = size_with_inhalo(x) - _mycoords = mycoords(x) - _decomposition = decomposition(x) - - ntuple(idim->begin - local strt,stop - if _decomposition[idim] == nothing - strt = 1 - stop = n[idim] - else - strt = localidxs[idim][1] + (_mycoords[idim]-1)*(h[idim][1] + h[idim][2]) - stop = strt + length(localidxs[idim]) - 1 + h[idim][1] + h[idim][2] - end - strt:stop - end, N) -end - -function localindices_with_halo(x::DiscreteFunction{T,N,DevitoMPITrue}) where {T,N} - h = halo(x) - localidxs = localindices(x) - n = size_with_halo(x) - - _mycoords = mycoords(x) - _topology = topology(x) - _decomposition = decomposition(x) - - ntuple(idim->begin - local strt,stop - if _decomposition[idim] == nothing - strt = 1 - stop = n[idim] - else - strt = _mycoords[idim] == 1 ? localidxs[idim][1] : localidxs[idim][1] + h[idim][1] - stop = _mycoords[idim] == _topology[idim] ? localidxs[idim][end] + h[idim][1] + h[idim][2] : localidxs[idim][end] + h[idim][1] - end - strt:stop - end, N) -end - -function data(x::Function{T,N,DevitoMPITrue}) where {T,N} - p = sview(parent(data_allocated(x)), localmask(x)...) - d = decomposition(x) - t = topology(x) - idxs = localindices(x) - n = _size_from_local_indices(idxs) - DevitoMPIArray{T,N,typeof(p),typeof(d)}(x.o."_data_allocated", p, idxs, d, t, n) -end - -function data_with_halo(x::Function{T,N,DevitoMPITrue}) where {T,N} - p = sview(parent(data_allocated(x)), localmask_with_halo(x)...) - d = decomposition_with_halo(x) - t = topology(x) - idxs = localindices_with_halo(x) - n = _size_from_local_indices(idxs) - DevitoMPIArray{T,N,typeof(p),typeof(d)}(x.o."_data_allocated", p, idxs, d, t, n) -end - -function data_with_inhalo(x::Function{T,N,DevitoMPITrue}) where {T,N} - p = sview(parent(data_allocated(x)), localmask_with_inhalo(x)...) - d = decomposition_with_inhalo(x) - t = topology(x) - idxs = localindices_with_inhalo(x) - n = _size_from_local_indices(idxs) - DevitoMPIArray{T,N,typeof(p),typeof(d)}(x.o."_data_allocated", p, idxs, d, t, n) -end - -function data_allocated(x::Function{T,N,DevitoMPITrue}) where {T,N} - DevitoMPIArray{T,N}(x.o."_data_allocated", localindices_with_inhalo(x), decomposition(x), topology(x)) -end - -function data(x::TimeFunction{T,N,DevitoMPITrue}) where {T,N} - p = sview(parent(data_allocated(x)), localmask(x)...) - d = decomposition(x) - t = topology(x) - idxs = localindices(x) - n = _size_from_local_indices(idxs) - DevitoMPITimeArray{T,N,typeof(p),length(t),typeof(d)}(x.o."_data_allocated", p, idxs, d, t, n) -end - -function data_with_halo(x::TimeFunction{T,N,DevitoMPITrue}) where {T,N} - p = sview(parent(data_allocated(x)), localmask_with_halo(x)...) - d = decomposition_with_halo(x) - t = topology(x) - idxs = localindices_with_halo(x) - n = _size_from_local_indices(idxs) - DevitoMPITimeArray{T,N,typeof(p),length(t),typeof(d)}(x.o."_data_allocated", p, idxs, d, t, n) -end - -function data_with_inhalo(x::TimeFunction{T,N,DevitoMPITrue}) where {T,N} - p = sview(parent(data_allocated(x)), localmask_with_inhalo(x)...) - d = decomposition_with_inhalo(x) - t = topology(x) - idxs = localindices_with_inhalo(x) - n = _size_from_local_indices(idxs) - DevitoMPITimeArray{T,N,typeof(p),length(t),typeof(d)}(x.o."_data_allocated", p, idxs, d, t, n) -end - -function data_allocated(x::TimeFunction{T,N,DevitoMPITrue}) where {T,N} - DevitoMPITimeArray{T,N}(x.o."_data_allocated", localindices_with_inhalo(x), decomposition(x), topology(x)) -end - -function data_allocated(x::SubFunction{T,2,DevitoMPITrue}) where {T} - topo = (1, MPI.Comm_size(MPI.COMM_WORLD)) # topo is not defined for sparse decompositions - d = DevitoMPIArray{T,2}(x.o."_data_allocated", localindices(x), decomposition(x), topo) -end - -sparsetopo(x::Union{SparseFunction{T,N,DevitoMPITrue},SparseTimeFunction{T,N,DevitoMPITrue}}) where {T,N} = ntuple(i-> length(decomposition(x)[i]) > 1 ? MPI.Comm_size(MPI.COMM_WORLD) : 1, N) - -localindxhelper(x) = length(x) > 1 ? x[MPI.Comm_rank(MPI.COMM_WORLD)+1] : x[1] - -sparseindices(x::Union{SparseFunction{T,N,DevitoMPITrue},SparseTimeFunction{T,N,DevitoMPITrue}}) where {T,N} = localindxhelper.(decomposition(x)) - -function data_with_inhalo(x::SparseFunction{T,N,DevitoMPITrue}) where {T,N} - d = DevitoMPISparseArray{T,N}(x.o."_data_allocated", sparseindices(x), decomposition(x), sparsetopo(x)) - MPI.Barrier(MPI.COMM_WORLD) - d -end - -# TODO - needed? <-- -function data_with_inhalo(x::SparseTimeFunction{T,N,DevitoMPITrue}) where {T,N} - d = DevitoMPISparseTimeArray{T,N}(x.o."_data_allocated", sparseindices(x), decomposition(x), sparsetopo(x)) - MPI.Barrier(MPI.COMM_WORLD) - d -end - -function data_with_inhalo(x::SparseDiscreteFunction{T,N,DevitoMPIFalse}) where {T,N} - d = DevitoArray{T,N}(x.o."_data_allocated") - d -end - data_with_halo(x::SparseDiscreteFunction{T,N,M}) where {T,N,M} = data_with_inhalo(x) data(x::SparseDiscreteFunction{T,N,M}) where {T,N,M} = data_with_inhalo(x) data(x::SubFunction{T,N,M}) where {T,N,M} = data_allocated(x) @@ -2274,8 +1840,8 @@ export Function, SparseFunction, SparseTimeFunction, SubDomain, TimeFunction, ap export backward, ccode, configuration, configuration!, switchconfig, coordinates, coordinates_data export data, data_allocated, data_with_halo, data_with_inhalo, dimension, dimensions export dx, dy, dz, evaluate, extent, forward, grid, halo, indexed, inject, interpolate -export localindices, localindices_with_halo, localindices_with_inhalo, localsize, name +export localindices, localindices_with_halo, localindices_with_inhalo, name export nsimplify, origin, size_with_halo, simplify, solve, space_order, spacing, spacing_map export step, subdomains, subs, thickness, value, value! -end +end \ No newline at end of file diff --git a/test/LocalPreferences.toml b/test/LocalPreferences.toml new file mode 100644 index 00000000..d970ab67 --- /dev/null +++ b/test/LocalPreferences.toml @@ -0,0 +1,9 @@ +[MPIPreferences] +__clear__ = ["preloads_env_switch"] +_format = "1.0" +abi = "OpenMPI" +binary = "system" +cclibs = [] +libmpi = "libmpi" +mpiexec = "mpiexec" +preloads = [] diff --git a/test/Project.toml b/test/Project.toml index 254bdf7e..cad73a3a 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,6 @@ [deps] MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" +MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Strided = "5e0ebb24-38b0-5f93-81fe-25c709ecae67" diff --git a/test/devitoprotests.jl b/test/devitoprotests.jl index 448a94f5..043e1b60 100644 --- a/test/devitoprotests.jl +++ b/test/devitoprotests.jl @@ -17,54 +17,60 @@ using Devito, PyCall, Test @test isequal(vp, Devito.vp(abox)) end -# 2024-08-15 JKW these two ABox tests are broken -- some kind of API change? -@testset "ABox Time Function" begin - g = Grid(shape=(5,5), extent=(4.0,4.0)) - nt = 3 - coords = [2. 2. ;] - space_order = 0 - vp = Devito.Function(name="vp", grid=g, space_order=space_order) - src = SparseTimeFunction(name="src", grid=g, nt=nt, npoint=size(coords)[1], coordinates=coords, space_order=0) - data(vp) .= 1.0 - dt = 1.0 - t = time_dim(g) - abox = ABox(src, nothing, vp, space_order) - # This needs layers = nothing as currently setup to automatically - # deffault to disk and buffered/saved functions cannot be used like that in an equation - u = TimeFunction(name="u", grid=g, save=nt, space_order=space_order, layers=nothing) - op = Operator([Eq(forward(u), t+1, subdomain=abox)]) - apply(op, dt=dt) - @test data(u)[:,:,1] ≈ zeros(Float32, 5 , 5) - @test data(u)[2:end-1,2:end-1,2] ≈ ones(Float32, 3, 3) - data(u)[2:end-1,2:end-1,2] .= 0 - @test data(u)[:,:,2] ≈ zeros(Float32, 5 , 5) - @test data(u)[:,:,3] ≈ 2 .* ones(Float32, 5 , 5) +# TODO (9/2/2025) - failing with decoupler, mloubout is looking into the issue +if get(ENV, "DEVITO_DECOUPLER", "0") != "1" + # TODO - 2024-08-15 JKW these two ABox tests are broken -- some kind of API change? + @testset "ABox Time Function" begin + g = Grid(shape=(5,5), extent=(4.0,4.0)) + nt = 3 + coords = [2. 2. ;] + space_order = 0 + vp = Devito.Function(name="vp", grid=g, space_order=space_order) + src = SparseTimeFunction(name="src", grid=g, nt=nt, npoint=size(coords)[1], coordinates=coords, space_order=0) + data(vp) .= 1.0 + dt = 1.0 + t = time_dim(g) + abox = ABox(src, nothing, vp, space_order) + # This needs layers = nothing as currently setup to automatically + # deffault to disk and buffered/saved functions cannot be used like that in an equation + u = TimeFunction(name="u", grid=g, save=nt, space_order=space_order, layers=nothing) + op = Operator([Eq(forward(u), t+1, subdomain=abox)]) + apply(op, dt=dt) + @test data(u)[:,:,1] ≈ zeros(Float32, 5 , 5) + @test data(u)[2:end-1,2:end-1,2] ≈ ones(Float32, 3, 3) + data(u)[2:end-1,2:end-1,2] .= 0 + @test data(u)[:,:,2] ≈ zeros(Float32, 5 , 5) + @test data(u)[:,:,3] ≈ 2 .* ones(Float32, 5 , 5) + end end -@testset "ABox Intersection Time Function" begin - mid = SubDomain("mid",[("middle",2,2),("middle",0,0)]) - g = Grid(shape=(5,5), extent=(4.0,4.0), subdomains=mid) - nt = 3 - coords = [2. 2. ;] - space_order = 0 - vp = Devito.Function(name="vp", grid=g, space_order=space_order) - src = SparseTimeFunction(name="src", grid=g, nt=nt, npoint=size(coords)[1], coordinates=coords, space_order=0) - data(vp) .= 1.0 - dt = 1.0 - t = time_dim(g) - abox = ABox(src, nothing, vp, space_order) - intbox = Devito.intersection(abox,mid) - # Similar as above, need layers=nothing - u = TimeFunction(name="u", grid=g, save=nt, space_order=space_order, layers=nothing) - op = Operator([Eq(forward(u), t+1, subdomain=intbox)]) - apply(op, dt=dt) - @test data(u)[:,:,1] ≈ zeros(Float32, 5 , 5) - @test data(u)[3,2:4,2] ≈ ones(Float32, 3) - data(u)[3,2:4,2] .= 0 - @test data(u)[:,:,2] ≈ zeros(Float32, 5 , 5) - @test data(u)[3,:,3] ≈ 2 .* ones(Float32, 5) - data(u)[3,:,3] .= 0 - @test data(u)[:,:,3] ≈ zeros(Float32, 5 , 5) +# TODO (9/2/2025)- failing with decoupler, mloubout is looking into the issue +if get(ENV, "DEVITO_DECOUPLER", "0") != "1" + @testset "ABox Intersection Time Function" begin + mid = SubDomain("mid",[("middle",2,2),("middle",0,0)]) + g = Grid(shape=(5,5), extent=(4.0,4.0), subdomains=mid) + nt = 3 + coords = [2. 2. ;] + space_order = 0 + vp = Devito.Function(name="vp", grid=g, space_order=space_order) + src = SparseTimeFunction(name="src", grid=g, nt=nt, npoint=size(coords)[1], coordinates=coords, space_order=0) + data(vp) .= 1.0 + dt = 1.0 + t = time_dim(g) + abox = ABox(src, nothing, vp, space_order) + intbox = Devito.intersection(abox,mid) + # Similar as above, need layers=nothing + u = TimeFunction(name="u", grid=g, save=nt, space_order=space_order, layers=nothing) + op = Operator([Eq(forward(u), t+1, subdomain=intbox)]) + apply(op, dt=dt) + @test data(u)[:,:,1] ≈ zeros(Float32, 5 , 5) + @test data(u)[3,2:4,2] ≈ ones(Float32, 3) + data(u)[3,2:4,2] .= 0 + @test data(u)[:,:,2] ≈ zeros(Float32, 5 , 5) + @test data(u)[3,:,3] ≈ 2 .* ones(Float32, 5) + data(u)[3,:,3] .= 0 + @test data(u)[:,:,3] ≈ zeros(Float32, 5 , 5) + end end @testset "FloatX dtypes with $(mytype), $(DT), $(CT)" for mytype ∈ [Float32, Float64], (nb, DT, CT) in zip([8, 16], [FloatX8, FloatX16], [UInt8, UInt16]) @@ -128,43 +134,48 @@ end @test isapprox(Devito.decompress.(data(f)), Devito.decompress.(data(g))) end -@testset "CCall with printf" begin - # CCall test written to use gcc - @pywith switchconfig(;compiler=get(ENV, "CC", "gcc")) begin - pf = CCall("printf", header="stdio.h") - @test Devito.name(pf) == "printf" - @test Devito.header(pf) == "stdio.h" - printingop = Operator([pf([""" "hello world!" """])]) - ccode(printingop, filename="helloworld.c") - # read the program - code = read("helloworld.c", String) - # check to make sure header is in the program - @test occursin("#include \"stdio.h\"\n", code) - # check to make sure the printf statement is in the program - @test occursin("printf( \"hello world!\" );\n", code) - # test to make sure the operator compiles and runs - @test try apply(printingop) - true - catch - false +devito_arch = get(ENV, "DEVITO_ARCH", "gcc") + +# TODO (9/2/2025) - failing with decoupler, mloubout is looking into the issue +if get(ENV, "DEVITO_DECOUPLER", "0") != "1" + @testset "CCall with printf" begin + # CCall test written to use gcc + carch = devito_arch in ["gcc", "clang"] ? devito_arch : "gcc" + @pywith switchconfig(;compiler=get(ENV, "CC", carch)) begin + pf = CCall("printf", header="stdio.h") + @test Devito.name(pf) == "printf" + @test Devito.header(pf) == "stdio.h" + printingop = Operator([pf([""" "hello world!" """])]) + ccode(printingop, filename="helloworld.c") + # read the program + code = read("helloworld.c", String) + # check to make sure header is in the program + @test occursin("#include \"stdio.h\"\n", code) + # check to make sure the printf statement is in the program + @test occursin("printf( \"hello world!\" );\n", code) + # test to make sure the operator compiles and runs + @test try apply(printingop) + true + catch + false + end + # remove the file + rm("helloworld.c", force=true) end - # remove the file - rm("helloworld.c", force=true) end end # currently only gcc and nvc are useful -devito_arch = get(ENV, "DEVITO_ARCH", "gcc") compression = [] (lowercase(devito_arch) == "nvc") && (push!(compression, "bitcomp")) -(lowercase(devito_arch) == "gcc") && (push!(compression, "cvxcompress")) +(lowercase(devito_arch) in ["gcc", "clang"]) && (push!(compression, "cvxcompress")) @testset "Serialization with compression=$(compression)" for compression in compression @info "testing compression with $(compression)" if compression == "bitcomp" configuration!("compiler", "nvc") else - configuration!("compiler", "gcc") + configuration!("compiler", devito_arch) end nt = 11 diff --git a/test/mpitests_2ranks.jl b/test/mpitests_2ranks.jl index a0b04abd..a65d6909 100644 --- a/test/mpitests_2ranks.jl +++ b/test/mpitests_2ranks.jl @@ -1,6 +1,12 @@ using Devito, MPI, Random, Strided, Test -MPI.Init() +@info MPI.MPIPreferences.abi, MPI.MPIPreferences.binary + +MPIExt = Base.get_extension(Devito, :MPIExt) + +if !MPI.Initialized() + MPI.Init() +end configuration!("log-level", "DEBUG") configuration!("language", "openmp") configuration!("mpi", true) @@ -9,7 +15,7 @@ configuration!("mpi", true) grid = Grid(shape=n, dtype=Float32) b = Devito.Function(name="b", grid=grid, space_order=2) b_data = data_with_halo(b) - @test isa(b_data, Devito.DevitoMPIArray{Float32,length(n)}) + @test isa(b_data, MPIExt.DevitoMPIArray{Float32,length(n)}) if length(n) == 2 @test size(b_data) == (15,14) else @@ -34,7 +40,7 @@ end grid = Grid(shape=n, dtype=Float32) b = Devito.Function(name="b", grid=grid, space_order=2) b_data = data(b) - @test isa(b_data, Devito.DevitoMPIArray{Float32,length(n)}) + @test isa(b_data, MPIExt.DevitoMPIArray{Float32,length(n)}) @test size(b_data) == n b_data .= 3.14f0 @@ -57,7 +63,7 @@ end grid = Grid(shape=n, dtype=Float32) b = Devito.Function(name="b", grid=grid, space_order=2) b_data = data_with_inhalo(b) - @test isa(b_data, Devito.DevitoMPIArray{Float32,length(n)}) + @test isa(b_data, MPIExt.DevitoMPIArray{Float32,length(n)}) _n = length(n) == 2 ? (15,18) : (16,15,18) @@ -94,7 +100,7 @@ end grid = Grid(shape=n, dtype=Float32) b = Devito.Function(name="b", grid=grid, space_order=2) b_data = data_with_halo(b) - @test isa(b_data, Devito.DevitoMPIArray{Float32,length(n)}) + @test isa(b_data, MPIExt.DevitoMPIArray{Float32,length(n)}) _n = length(n) == 2 ? (15,14) : (16,15,14) @@ -133,7 +139,7 @@ end grid = Grid(shape=n, dtype=Float32) b = Devito.Function(name="b", grid=grid, space_order=2) b_data = data(b) - @test isa(b_data, Devito.DevitoMPIArray{Float32,length(n)}) + @test isa(b_data, MPIExt.DevitoMPIArray{Float32,length(n)}) @test size(b_data) == n b_data_test = zeros(Float32, n) if MPI.Comm_rank(MPI.COMM_WORLD) == 0 @@ -262,26 +268,26 @@ end MPI.Barrier(MPI.COMM_WORLD) end -@testset "DevitoMPIArray localsize, n=$n" for n in ((5,4),(6,5,4)) +@testset "DevitoMPIArray MPIExt.localsize, n=$n" for n in ((5,4),(6,5,4)) g = Grid(shape=n) f = Devito.Function(name="f", grid=g) h = Devito.TimeFunction(name="h", grid=g, time_order=2) for func in (f,h) - @test localsize(data(func)) == length.(Devito.localindices(data(func))) + @test MPIExt.localsize(data(func)) == length.(Devito.localindices(data(func))) end end -@testset "DevitoMPISparseArray localsize, n=$n, npoint=$npoint" for n in ((5,4),(6,5,4)), npoint in (1,5,10) +@testset "DevitoMPISparseArray MPIExt.localsize, n=$n, npoint=$npoint" for n in ((5,4),(6,5,4)), npoint in (1,5,10) g = Grid(shape=n) sf = SparseFunction(name="sf", grid=g, npoint=npoint) - @test localsize(data(sf)) == length.(Devito.localindices(data(sf))) + @test MPIExt.localsize(data(sf)) == length.(Devito.localindices(data(sf))) end -@testset "DevitoMPISparseTimeArray localsize, n=$n, npoint=$npoint" for n in ((5,4),(6,5,4)), npoint in (1,5,10) +@testset "DevitoMPISparseTimeArray MPIExt.localsize, n=$n, npoint=$npoint" for n in ((5,4),(6,5,4)), npoint in (1,5,10) g = Grid(shape=n) nt = 11 stf = SparseTimeFunction(name="stf", grid=g, nt=11, npoint=npoint) - @test localsize(data(stf)) == length.(Devito.localindices(data(stf))) + @test MPIExt.localsize(data(stf)) == length.(Devito.localindices(data(stf))) end @testset "DevitoMPITimeArray, copy!, data, halo, n=$n" for n in ( (11,10), (12,11,10)) @@ -691,7 +697,7 @@ end grid = Grid(shape=n, dtype=Float32) sf = SparseFunction(name="sf", npoint=npoint, grid=grid) sf_coords = coordinates_data(sf) - @test isa(sf_coords, Devito.DevitoMPIArray) + @test isa(sf_coords, MPIExt.DevitoMPIArray) @test size(sf_coords) == (length(n),npoint) x = reshape(Float32[1:length(n)*npoint;], length(n), npoint) @@ -728,7 +734,7 @@ end grid = Grid(shape=n, dtype=Float32) stf = SparseTimeFunction(name="stf", npoint=npoint, nt=100, grid=grid) stf_coords = coordinates_data(stf) - @test isa(stf_coords, Devito.DevitoMPIArray) + @test isa(stf_coords, MPIExt.DevitoMPIArray) @test size(stf_coords) == (length(n),npoint) x = reshape(Float32[1:length(n)*npoint;], length(n), npoint) @@ -854,7 +860,7 @@ end end MPI.Barrier(MPI.COMM_WORLD) _x = data(sf) - @test isa(data(sf), Devito.DevitoMPISparseArray) + @test isa(data(sf), MPIExt.DevitoMPISparseArray) copy!(_x, x) x .= Float32[1:npoint;] @@ -876,7 +882,7 @@ end end MPI.Barrier(MPI.COMM_WORLD) _x = data(stf) - @test isa(data(stf), Devito.DevitoMPISparseTimeArray) + @test isa(data(stf), MPIExt.DevitoMPISparseTimeArray) copy!(_x, x) x .= reshape(Float32[1:prod(nt*npoint);], npoint, nt) diff --git a/test/mpitests_4ranks.jl b/test/mpitests_4ranks.jl index ab97355c..24a74c24 100644 --- a/test/mpitests_4ranks.jl +++ b/test/mpitests_4ranks.jl @@ -1,5 +1,13 @@ using Devito, MPI, Random, Strided, Test +@info MPI.MPIPreferences.abi, MPI.MPIPreferences.binary + +MPIExt = Base.get_extension(Devito, :MPIExt) + +if !MPI.Initialized() + MPI.Init() +end + MPI.Init() configuration!("log-level", "DEBUG") configuration!("language", "openmp") @@ -9,7 +17,7 @@ configuration!("mpi", true) grid = Grid(shape=n, dtype=Float32) b = Devito.Function(name="b", grid=grid, space_order=2) b_data = data(b) - @test isa(b_data, Devito.DevitoMPIArray{Float32,length(n)}) + @test isa(b_data, MPIExt.DevitoMPIArray{Float32,length(n)}) @test size(b_data) == n b_data_test = zeros(Float32, n) if MPI.Comm_rank(MPI.COMM_WORLD) == 0 @@ -66,7 +74,7 @@ configuration!("mpi", true) end end -@testset "Convert data from rank 0 to DevitoMPIArray then back, no halo, n=$n" for n in ( (11,10), (12,11,10) ) +@test_skip @testset "Convert data from rank 0 to DevitoMPIArray then back, no halo, n=$n" for n in ( (11,10), (12,11,10) ) grid = Grid(shape=n, dtype=Float32) b = Devito.Function(name="b", grid=grid, space_order=2) b_data = data(b) @@ -514,7 +522,7 @@ end end MPI.Barrier(MPI.COMM_WORLD) _x = data(sf) - @test isa(data(sf), Devito.DevitoMPISparseArray) + @test isa(data(sf), MPIExt.DevitoMPISparseArray) copy!(_x, x) x .= Float32[1:npoint;] @@ -536,7 +544,7 @@ end end MPI.Barrier(MPI.COMM_WORLD) _x = data(stf) - @test isa(data(stf), Devito.DevitoMPISparseTimeArray) + @test isa(data(stf), MPIExt.DevitoMPISparseTimeArray) copy!(_x, x) x .= reshape(Float32[1:prod(nt*npoint);], npoint, nt) @@ -573,7 +581,7 @@ end end end -@testset "MPI Getindex for TimeFunction n=$n" for n in ( (11,10), (5,4), (7,2), (4,5,6), (2,3,4) ) +@test_skip @testset "MPI Getindex for TimeFunction n=$n" for n in ( (11,10), (5,4), (7,2), (4,5,6), (2,3,4) ) N = length(n) nt = 5 rnk = MPI.Comm_rank(MPI.COMM_WORLD) diff --git a/test/runtests.jl b/test/runtests.jl index d484d94f..56125733 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,16 +1,37 @@ -using Devito +using Devito, MPI, MPIPreferences + +@info """ +When running via the `Pkg.test` method, the MPI implementation is set via the test/LocalPreferences.toml file. +MPIPreferences.binary=$(MPIPreferences.binary). + +To change to a different implementation do (for example): + + cd(DEPOT_PATH[1] * "/dev/Devito) + ]activate . + using MPIPreferences + MPIPreferences.use_jll_binary("MPICH_jll") +""" for testscript in ("serialtests.jl", "gencodetests.jl", "csymbolicstests.jl") include(testscript) end -# JKW: disabling mpi tests for now, we expect to remove MPI features from Devito.jl in future PR -# run(`$(mpiexec()) -n 2 julia --code-coverage mpitests_2ranks.jl`) -# run(`$(mpiexec()) -n 4 julia --code-coverage mpitests_4ranks.jl`) - +# Only run devitopro tests if devitopro is available if Devito.has_devitopro() @info "running devito pro tests" + include("devitoprotests.jl") + @info "running pro tests with the decoupler" + withenv("DEVITO_DECOUPLER"=>"1", "DEVITO_DECOUPLER_WORKERS"=>"2", "MPI4PY_RC_RECV_MPROBE"=>"0") do + run(`$(mpiexec()) -n 1 julia --code-coverage devitoprotests.jl`) + end else @info "not running devito pro tests" end + + +@info "mpi tests with DEVITO_AUTOPADDING=0" +withenv("DEVITO_AUTOPADDING" => "0") do + run(`$(mpiexec()) -n 2 julia --code-coverage mpitests_2ranks.jl`) + run(`$(mpiexec()) -n 4 julia --code-coverage mpitests_4ranks.jl`) +end diff --git a/test/serialtests.jl b/test/serialtests.jl index d6617db6..4416e44b 100644 --- a/test/serialtests.jl +++ b/test/serialtests.jl @@ -6,7 +6,7 @@ configuration!("language", "openmp") configuration!("mpi", false) # you need to use when testing locally due to the Libdl startup issue for the nv compiler -configuration!("compiler", get(ENV, "CC", "gcc")) +configuration!("compiler", get(ENV, "CC", get(ENV, "DEVITO_ARCH", "gcc"))) configuration!("platform", "cpu64") @testset "configuration" begin