diff --git a/examples/Project.toml b/examples/Project.toml index 3671d5c..6272520 100644 --- a/examples/Project.toml +++ b/examples/Project.toml @@ -1,10 +1,13 @@ [deps] CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" +EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" KrylovPreconditioners = "45d422c2-293f-44ce-8315-2cb988662dec" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" NewtonKrylov = "0be81120-40bf-4f8b-adf0-26103efb66f1" +OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" -[sources] -NewtonKrylov = {path = ".."} +[sources.NewtonKrylov] +path = ".." diff --git a/examples/bratu_mpi.jl b/examples/bratu_mpi.jl new file mode 100644 index 0000000..5c9e02f --- /dev/null +++ b/examples/bratu_mpi.jl @@ -0,0 +1,208 @@ +# # 1D bratu equation from (Kan2022-ko)[@cite] + +# # Run with `JULIAUP_CHANNEL="1.10" ./mpiexecjl --project=. -np 2 julia bratu_mpi.jl` + +# ## Necessary packages +using NewtonKrylov, Krylov +using KrylovPreconditioners +using SparseArrays, LinearAlgebra +using OffsetArrays +using MPI + +struct LocalData{FC, D} <: AbstractVector{FC} + data::D + + function LocalData(data::D) where {D} + FC = eltype(data) + return new{FC, D}(data) + end +end + +function LocalData{FC, D}(::UndefInitializer, l::Int64) where {FC, D} + return LocalData(D(undef, l + 2)) +end + +Base.length(v::LocalData) = return length(v.data) - 2 +Base.size(v::LocalData) = return (length(v),) + +Base.@propagate_inbounds function Base.getindex(v::LocalData, idx) + return getindex(v.data, idx + 1) +end + +Base.@propagate_inbounds function Base.setindex!(v::LocalData, x, idx) + return setindex!(v.data, x, idx + 1) +end + +Base.similar(v::LocalData) = LocalData(similar(v.data)) +Base.copy(v::LocalData) = LocalData(copy(v.data)) +Base.zero(v::LocalData) = LocalData(zero(v.data)) + +function Base.cconvert(::Type{MPI.MPIPtr}, buf::LocalData) + return view(buf.data, 2:(length(buf) - 1)) +end +function MPI.Buffer(v::LocalData) + return MPI.Buffer(v, Cint(length(v)), MPI.Datatype(eltype(v))) +end + +using Krylov +import Krylov.FloatOrComplex + +function Krylov.kdot(n::Integer, x::LocalData{T}, y::LocalData{T}) where {T <: FloatOrComplex} + return @views MPI.Allreduce( + dot(x.data[2:(length(x) + 1)], y.data[2:(length(y) + 1)]), + +, MPI.COMM_WORLD + ) +end + +function Krylov.knorm(n::Integer, x::LocalData{T}) where {T <: FloatOrComplex} + return @views sqrt(MPI.Allreduce(sum(abs2, x.data[2:(length(x) + 1)]), +, MPI.COMM_WORLD)) +end + +# We are going to split a domain of size N +# into equal chunks. + +function localdomain(N, nranks, myrank) + n = cld(N, nranks) + first = (n * myrank) + 1 + last = min(n * (myrank + 1), N) + return first:last +end + + +# ## 1D Bratu equations +# $y′′ + λ * exp(y) = 0$ + +function update!(y::LocalData) + N = length(y) + # Set boundary conditions + nranks = MPI.Comm_size(MPI.COMM_WORLD) + myid = MPI.Comm_rank(MPI.COMM_WORLD) + + ## BVP boundary condition + if myid == 0 + y[0] = 0 + end + if myid == (nranks - 1) + y[N + 1] = 0 + end + + ## domain splitting + # reqs = MPI.MultiRequest(4) Enzyme strugles with this + # reqs = [MPI.Request() for _ in 1:4] + # if myid != (nranks - 1) + # # Send & data to the right + # MPI.Isend(view(y.data, N + 1), MPI.COMM_WORLD, reqs[1]; dest = myid + 1) + # MPI.Irecv!(view(y.data, N + 2), MPI.COMM_WORLD, reqs[2]; source = myid + 1) + # end + # if myid != 0 + # # Send data to the left + # MPI.Isend(view(y.data, 2), MPI.COMM_WORLD, reqs[3]; dest = myid - 1) + # MPI.Irecv!(view(y.data, 1), MPI.COMM_WORLD, reqs[4]; source = myid - 1) + # end + # MPI.Waitall(reqs) + + if myid != (nranks - 1) + # Send & data to the right + MPI.Send(view(y.data, N + 1), MPI.COMM_WORLD; dest = myid + 1) + MPI.Recv!(view(y.data, N + 2), MPI.COMM_WORLD; source = myid + 1) + end + if myid != 0 + # Send data to the left + MPI.Recv!(view(y.data, 1), MPI.COMM_WORLD; source = myid - 1) + MPI.Send(view(y.data, 2), MPI.COMM_WORLD; dest = myid - 1) + end + + return nothing +end + +function bratu!(res::LocalData, y::LocalData, Δx, λ) + update!(y) + # Must be part of the equation since otherwise the krylov solver + # Will not update the boundary conditions for the inner iterations + N = length(y) + ## Calculate residual + for i in 1:N + y_l = y[i - 1] + y_r = y[i + 1] + y′′ = (y_r - 2y[i] + y_l) / Δx^2 + + res[i] = y′′ + λ * exp(y[i]) # = 0 + end + return nothing +end + +function bratu(y, dx, λ) + res = similar(y) + bratu!(res, y, dx, λ) + return res +end + +# ## Reference solution +function true_sol_bratu(x) + ## for λ = 3.51382, 2nd sol θ = 4.8057 + θ = 4.79173 + return -2 * log(cosh(θ * (x - 0.5) / 2) / (cosh(θ / 4))) +end + +# # Setup +MPI.Init() + +const nranks = MPI.Comm_size(MPI.COMM_WORLD) +const myid = MPI.Comm_rank(MPI.COMM_WORLD) + +# ## Choice of parameters +const N = 10_000 +const λ = 3.51382 +const dx = 1 / (N + 1) # Grid-spacing + +# ### Domain and Inital condition +LI = localdomain(N, nranks, myid) +const l_N = length(LI) +u₀ = Vector{Float64}(undef, l_N + 2) + +X = LinRange(0.0 + dx, 1.0 - dx, N) # Global +x = view(X, LI) + +u₀ = LocalData(u₀) +u₀ .= sin.(x .* π) +update!(u₀) + +# JOp = NewtonKrylov.JacobianOperator((res, u) -> bratu!(res, u, dx, λ), similar(u₀), u₀) +# J = collect(JOp) +# display(J) + +U₀ = MPI.Gather(u₀, MPI.COMM_WORLD) + +# ## Solving using inplace variant and CG +uₖ, stats = newton_krylov!( + (res, u) -> bratu!(res, u, dx, λ), + copy(u₀), similar(u₀); + Solver = CgSolver, + # update! = (u) -> update!(u), + norm = v -> Krylov.knorm(length(v), v), + verbose = myid == 0 ? 1 : 0 +) +@show stats + +Uₖ = MPI.Gather(uₖ, MPI.COMM_WORLD) + +if myid == 0 + using CairoMakie + + reference = true_sol_bratu.(X) + ϵ = abs2.(Uₖ .- reference) + + fig = Figure(size = (800, 800)) + ax = Axis(fig[1, 1], title = "", ylabel = "", xlabel = "") + + lines!(ax, X, reference, label = "True solution") + lines!(ax, X, U₀, label = "Initial guess") + lines!(ax, X, Uₖ, label = "Newton-Krylov solution") + + axislegend(ax, position = :cb) + + ax = Axis(fig[1, 2], title = "Error", ylabel = "abs2 err", xlabel = "") + lines!(ax, X, ϵ) + + save("bratu_mpi_n$(nranks).png", fig) +end diff --git a/examples/mpiexecjl b/examples/mpiexecjl new file mode 100755 index 0000000..137c9b4 --- /dev/null +++ b/examples/mpiexecjl @@ -0,0 +1,76 @@ +#!/bin/sh +# +# Copyright (C) 2020 Simon Byrne, Mosè Giordano +# License is MIT "Expat" +# +### Commentary: +# +# Command line utility to call the `mpiexec` binary used by the `MPI.jl` version +# in the given Julia project. It has the same syntax as the `mpiexec` binary +# that would be called, with the additional `--project=...` flag to select a +# different Julia project. +# +# Examples of usage (the MPI flags available depend on the MPI implementation +# called): +# +# $ mpiexecjl --version +# $ mpiexecjl -n 40 julia mpi-script.jl +# $ mpiexecjl --project=my_experiment -n 80 --oversubscribe julia mpi-script.jl +# +### Code: + +usage () { + echo "Usage: ${0} [--project=...] MPIEXEC_ARGUMENTS..." + echo "Call the mpiexec binary in the Julia environment specified by the --project option." + echo "If no project is specified, the MPI associated with the global Julia environment will be used." + echo "All other arguments are forwarded to mpiexec." +} + +for arg; do + shift + case "${arg}" in + --project | --project=*) + PROJECT_ARG="${arg}" + ;; + -h | --help) + usage + echo "Below is the help of the current mpiexec." + echo + set -- "${@}" "${arg}" + ;; + *) + set -- "${@}" "${arg}" + ;; + esac +done + +if [ -z "${*}" ]; then + echo "ERROR: no arguments specified." 1>&2 + echo + usage + exit 1 +fi + +if [ -n "${JULIA_BINDIR}" ]; then + JULIA_CMD="${JULIA_BINDIR}/julia" +else + JULIA_CMD="julia" +fi + +# shellcheck disable=SC2016 +SCRIPT=' +using MPI +ENV["JULIA_PROJECT"] = dirname(Base.active_project()) +proc = run(pipeline(`$(mpiexec()) $(ARGS)`; stdout, stderr); wait=false) +wait(proc) +if !iszero(proc.exitcode) + @error "The MPI process failed" proc +end +exit(proc.exitcode) +' + +if [ -n "${PROJECT_ARG}" ]; then + "${JULIA_CMD}" "${PROJECT_ARG}" --color=yes --startup-file=no -q --compile=min -O0 -e "${SCRIPT}" -- "${@}" +else + "${JULIA_CMD}" --color=yes --startup-file=no -q --compile=min -O0 -e "${SCRIPT}" -- "${@}" +fi diff --git a/examples/simple.jl b/examples/simple.jl index 5347cab..05d862f 100644 --- a/examples/simple.jl +++ b/examples/simple.jl @@ -48,3 +48,8 @@ end lines!(ax, trace_3) fig + +# ## Extract Jacobian + +JOp = NewtonKrylov.JacobianOperator(F!, zeros(2), [3.0, 4.0]) +J = collect(JOp) diff --git a/src/NewtonKrylov.jl b/src/NewtonKrylov.jl index db993aa..c999f4c 100644 --- a/src/NewtonKrylov.jl +++ b/src/NewtonKrylov.jl @@ -19,7 +19,9 @@ function maybe_duplicated(f, df) end end -struct JacobianOperator{F, A} +abstract type AbstractJacobianOperator end + +struct JacobianOperator{F, A} <: AbstractJacobianOperator f::F # F!(res, u) f_cache::F res::A @@ -65,14 +67,14 @@ function mul!(out, J′::Union{Adjoint{<:Any, <:JacobianOperator}, Transpose{<:A return nothing end -function Base.collect(JOp::JacobianOperator) +function Base.collect(JOp::AbstractJacobianOperator) N, M = size(JOp) - v = zeros(eltype(JOp), M) - out = zeros(eltype(JOp), N) + v = zero(JOp.u) + out = zero(JOp.res) J = SparseMatrixCSC{eltype(v), Int}(undef, size(JOp)...) for j in 1:M - out .= 0.0 - v .= 0.0 + out .= 0 + v .= 0 v[j] = 1.0 mul!(out, JOp, v) for i in 1:N @@ -84,6 +86,29 @@ function Base.collect(JOp::JacobianOperator) return J end +struct FDJacobianOperator{F, A} <: AbstractJacobianOperator + f::F # F!(res, u) + res::A + u::A + function FDJacobianOperator(f::F, res, u) where {F} + return new{F, typeof(u)}(f, res, u) + end +end + +Base.size(J::FDJacobianOperator) = (length(J.res), length(J.u)) +Base.eltype(J::FDJacobianOperator) = eltype(J.u) + +function mul!(out, J::FDJacobianOperator, v) + ϵ = cbrt(eps()/2) + # (F(u + ϵ .* v) - F(u - ϵ .* v)) ./ 2ϵ + + J.f(J.res, J.u .+ ϵ .* v) + out .= J.res + J.f(J.res, J.u .- ϵ .* v) + out .= (out .- J.res) ./ 2ϵ + return nothing +end + ## # Newton-Krylov ## @@ -167,8 +192,13 @@ function newton_krylov!( N = nothing, krylov_kwargs = (;), callback = (args...) -> nothing, + norm = norm, + update! = (u) -> nothing, + Operator = JacobianOperator, ) t₀ = time_ns() + + update!(u) # Impose BC F!(res, u) # res = F(u) n_res = norm(res) callback(u, res, n_res) @@ -181,8 +211,9 @@ function newton_krylov!( verbose > 0 && @info "Jacobian-Free Newton-Krylov" Solver res₀ = n_res tol tol_rel tol_abs η - J = JacobianOperator(F!, res, u) + J = Operator(F!, res, u) solver = Solver(J, res) + b = similar(res) stats = Stats(0, 0) while n_res > tol && stats.outer_iterations <= max_niter @@ -201,8 +232,8 @@ function newton_krylov!( # Solve: Jx = -res # res is modifyed by J, so we create a copy `-res` - # TODO: provide a temporary storage for `-res` - solve!(solver, J, -res; kwargs...) + b .= .- res + solve!(solver, J, b; kwargs...) d = solver.x # Newton direction s = 1 # Newton step TODO: LineSearch @@ -213,6 +244,7 @@ function newton_krylov!( # Update residual and norm n_res_prior = n_res + update!(u) # Impose BC F!(res, u) # res = F(u) n_res = norm(res) callback(u, res, n_res) @@ -226,7 +258,7 @@ function newton_krylov!( η = forcing(η, tol, n_res, n_res_prior) end - verbose > 0 && @info "Newton" iter = n_res η stats + verbose > 0 && @info "Newton" resᵢ = n_res η stats stats = update(stats, solver.stats.niter) end t = (time_ns() - t₀) / 1.0e9