From dc2dd3bc0b05530dc38618634b6bbb3ffb4d5d7c Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 2 Dec 2024 10:56:09 +0100 Subject: [PATCH 1/7] add WIP for MPI example --- examples/Project.toml | 7 ++- examples/bratu_mpi.jl | 111 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 2 deletions(-) create mode 100644 examples/bratu_mpi.jl diff --git a/examples/Project.toml b/examples/Project.toml index 3671d5c..6272520 100644 --- a/examples/Project.toml +++ b/examples/Project.toml @@ -1,10 +1,13 @@ [deps] CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" +EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" KrylovPreconditioners = "45d422c2-293f-44ce-8315-2cb988662dec" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" NewtonKrylov = "0be81120-40bf-4f8b-adf0-26103efb66f1" +OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" -[sources] -NewtonKrylov = {path = ".."} +[sources.NewtonKrylov] +path = ".." diff --git a/examples/bratu_mpi.jl b/examples/bratu_mpi.jl new file mode 100644 index 0000000..01fcd17 --- /dev/null +++ b/examples/bratu_mpi.jl @@ -0,0 +1,111 @@ +# # 1D bratu equation from (Kan2022-ko)[@cite] + +# ## Necessary packages +using NewtonKrylov, Krylov +using KrylovPreconditioners +using SparseArrays, LinearAlgebra +using OffsetArrays +using MPI + +using EnzymeCore +import EnzymeCore.EnzymeRules: forward +using EnzymeCore.EnzymeRules + +function forward(::FwdConfig, func::Const{MPI.Request}, ::Type{<:Duplicated}) + # Enzyme: unhandled forward for jl_f_finalizer + return Duplicated(MPI.Request(), MPI.Request()) +end + +MPI.Init() + +nranks = MPI.Comm_size(MPI.COMM_WORLD) +myid = MPI.Comm_rank(MPI.COMM_WORLD) + +# We are going to split a domain of size N +# into equal chunks. + +function localdomain(N, nranks, myrank) + n = cld(N, nranks) + first = (n * myrank) + 1 + last = min(n * (myrank + 1), N) + first:last +end + +# ## Choice of parameters +const N = 10_000 +const λ = 3.51382 +const dx = 1 / (N + 1) # Grid-spacing + +# ### Domain and Inital condition +LI = localdomain(N, 1, 0) # TODO + +x = LinRange(0.0, 1.0, N+2) # Global +u₀ = sin.(x .* π) + +# ## 1D Bratu equations +# $y′′ + λ * exp(y) = 0$ + + + +function bratu!(_res, _y, Δx, λ, N) + res = OffsetArray(_res, 0:(N+1)) + y = OffsetArray(_y, 0:(N+1)) + + # Set boundary conditions + nranks = MPI.Comm_size(MPI.COMM_WORLD) + myid = MPI.Comm_rank(MPI.COMM_WORLD) + + ## BVP boundary condition + if myid == 0 + y[0] = 0 + end + if myid == (nranks - 1) + y[N+1] = 0 + end + + ## domain splitting + # reqs = MPI.MultiRequest(4) Enzyme strugles with this + reqs = [MPI.Request() for _ in 1:4] + if myid != (nranks - 1) + # Send & data to the right + MPI.Isend(view(y, N), MPI.COMM_WORLD, reqs[1]; dest = myid + 1) + MPI.Irecv!(view(y, N+1), MPI.COMM_WORLD, reqs[2]; src = myid + 1) + end + if myid != 0 + # Send data to the left + MPI.Isend(view(y, 1), MPI.COMM_WORLD, reqs[3]; dest = myid - 1) + MPI.Irecv!(view(y, 0), MPI.COMM_WORLD, reqs[4]; src = myid - 1) + end + MPI.Waitall(reqs) + + ## Calculate residual + for i in 1:N + y_l = y[i - 1] + y_r = y[i + 1] + y′′ = (y_r - 2y[i] + y_l) / Δx^2 + + res[i] = y′′ + λ * exp(y[i]) # = 0 + end + return nothing +end + +function bratu(y, dx, λ, N) + res = similar(y) + bratu!(res, y, dx, λ, N) + return res +end + +# ## Reference solution +function true_sol_bratu(x) + ## for λ = 3.51382, 2nd sol θ = 4.8057 + θ = 4.79173 + return -2 * log(cosh(θ * (x - 0.5) / 2) / (cosh(θ / 4))) +end + +# ## Solving using inplace variant and CG +uₖ, _ = newton_krylov!( + (res, u) -> bratu!(res, u, dx, λ, N), + copy(u₀), similar(u₀); + Solver = CgSolver, +) + From fae43ebe72731d6cdcb524342331aede3f6654dd Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 2 Dec 2024 14:35:50 +0100 Subject: [PATCH 2/7] Update BC outside krylov --- examples/bratu_mpi.jl | 28 ++++++++++++---------------- src/NewtonKrylov.jl | 5 +++++ 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/examples/bratu_mpi.jl b/examples/bratu_mpi.jl index 01fcd17..765fe53 100644 --- a/examples/bratu_mpi.jl +++ b/examples/bratu_mpi.jl @@ -7,15 +7,6 @@ using SparseArrays, LinearAlgebra using OffsetArrays using MPI -using EnzymeCore -import EnzymeCore.EnzymeRules: forward -using EnzymeCore.EnzymeRules - -function forward(::FwdConfig, func::Const{MPI.Request}, ::Type{<:Duplicated}) - # Enzyme: unhandled forward for jl_f_finalizer - return Duplicated(MPI.Request(), MPI.Request()) -end - MPI.Init() nranks = MPI.Comm_size(MPI.COMM_WORLD) @@ -45,16 +36,13 @@ u₀ = sin.(x .* π) # ## 1D Bratu equations # $y′′ + λ * exp(y) = 0$ - - -function bratu!(_res, _y, Δx, λ, N) - res = OffsetArray(_res, 0:(N+1)) +function update!(_y, N) y = OffsetArray(_y, 0:(N+1)) # Set boundary conditions nranks = MPI.Comm_size(MPI.COMM_WORLD) myid = MPI.Comm_rank(MPI.COMM_WORLD) - + ## BVP boundary condition if myid == 0 y[0] = 0 @@ -69,14 +57,20 @@ function bratu!(_res, _y, Δx, λ, N) if myid != (nranks - 1) # Send & data to the right MPI.Isend(view(y, N), MPI.COMM_WORLD, reqs[1]; dest = myid + 1) - MPI.Irecv!(view(y, N+1), MPI.COMM_WORLD, reqs[2]; src = myid + 1) + MPI.Irecv!(view(y, N+1), MPI.COMM_WORLD, reqs[2]; source = myid + 1) end if myid != 0 # Send data to the left MPI.Isend(view(y, 1), MPI.COMM_WORLD, reqs[3]; dest = myid - 1) - MPI.Irecv!(view(y, 0), MPI.COMM_WORLD, reqs[4]; src = myid - 1) + MPI.Irecv!(view(y, 0), MPI.COMM_WORLD, reqs[4]; source = myid - 1) end MPI.Waitall(reqs) + return nothing +end + +function bratu!(_res, _y, Δx, λ, N) + res = OffsetArray(_res, 0:(N+1)) + y = OffsetArray(_y, 0:(N+1)) ## Calculate residual for i in 1:N @@ -107,5 +101,7 @@ uₖ, _ = newton_krylov!( (res, u) -> bratu!(res, u, dx, λ, N), copy(u₀), similar(u₀); Solver = CgSolver, + update! = (u)->update!(u, N), + norm = u->sqrt(MPI.Allreduce(sum(abs2, u), +, MPI.COMM_WORLD)) ) diff --git a/src/NewtonKrylov.jl b/src/NewtonKrylov.jl index db993aa..66ecdba 100644 --- a/src/NewtonKrylov.jl +++ b/src/NewtonKrylov.jl @@ -167,8 +167,12 @@ function newton_krylov!( N = nothing, krylov_kwargs = (;), callback = (args...) -> nothing, + norm = norm, + update! = (u) -> nothing ) t₀ = time_ns() + + update!(u) # Impose BC F!(res, u) # res = F(u) n_res = norm(res) callback(u, res, n_res) @@ -213,6 +217,7 @@ function newton_krylov!( # Update residual and norm n_res_prior = n_res + update!(u) # Impose BC F!(res, u) # res = F(u) n_res = norm(res) callback(u, res, n_res) From ec4d70e9ce24c2cd7d8364c28b931b1b6b21a6e0 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 2 Dec 2024 16:25:18 +0100 Subject: [PATCH 3/7] save not working model --- examples/bratu_mpi.jl | 64 +++++++++++++++++++++++++++--------- examples/mpiexecjl | 76 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 16 deletions(-) create mode 100755 examples/mpiexecjl diff --git a/examples/bratu_mpi.jl b/examples/bratu_mpi.jl index 765fe53..a635f51 100644 --- a/examples/bratu_mpi.jl +++ b/examples/bratu_mpi.jl @@ -1,5 +1,7 @@ # # 1D bratu equation from (Kan2022-ko)[@cite] +# # Run with `JULIAUP_CHANNEL="1.10" ./mpiexecjl --project=. -np 2 julia bratu_mpi.jl` + # ## Necessary packages using NewtonKrylov, Krylov using KrylovPreconditioners @@ -9,8 +11,8 @@ using MPI MPI.Init() -nranks = MPI.Comm_size(MPI.COMM_WORLD) -myid = MPI.Comm_rank(MPI.COMM_WORLD) +const nranks = MPI.Comm_size(MPI.COMM_WORLD) +const myid = MPI.Comm_rank(MPI.COMM_WORLD) # We are going to split a domain of size N # into equal chunks. @@ -19,7 +21,7 @@ function localdomain(N, nranks, myrank) n = cld(N, nranks) first = (n * myrank) + 1 last = min(n * (myrank + 1), N) - first:last + return first:last end # ## Choice of parameters @@ -28,27 +30,33 @@ const λ = 3.51382 const dx = 1 / (N + 1) # Grid-spacing # ### Domain and Inital condition -LI = localdomain(N, 1, 0) # TODO +LI = localdomain(N, nranks, myid) +const l_N = length(LI) +@show l_N +u₀ = Vector{Float64}(undef, l_N + 2) + +X = LinRange(0.0, 1.0, N) # Global +x = view(X, LI) -x = LinRange(0.0, 1.0, N+2) # Global -u₀ = sin.(x .* π) +u₀[2:(l_N + 1)] = sin.(x .* π) +U₀ = MPI.Gather(view(u₀, 2:(l_N + 1)), MPI.COMM_WORLD) # ## 1D Bratu equations # $y′′ + λ * exp(y) = 0$ function update!(_y, N) - y = OffsetArray(_y, 0:(N+1)) + y = OffsetArray(_y, 0:(N + 1)) # Set boundary conditions nranks = MPI.Comm_size(MPI.COMM_WORLD) - myid = MPI.Comm_rank(MPI.COMM_WORLD) - + myid = MPI.Comm_rank(MPI.COMM_WORLD) + ## BVP boundary condition if myid == 0 y[0] = 0 end if myid == (nranks - 1) - y[N+1] = 0 + y[N + 1] = 0 end ## domain splitting @@ -57,7 +65,7 @@ function update!(_y, N) if myid != (nranks - 1) # Send & data to the right MPI.Isend(view(y, N), MPI.COMM_WORLD, reqs[1]; dest = myid + 1) - MPI.Irecv!(view(y, N+1), MPI.COMM_WORLD, reqs[2]; source = myid + 1) + MPI.Irecv!(view(y, N + 1), MPI.COMM_WORLD, reqs[2]; source = myid + 1) end if myid != 0 # Send data to the left @@ -69,8 +77,8 @@ function update!(_y, N) end function bratu!(_res, _y, Δx, λ, N) - res = OffsetArray(_res, 0:(N+1)) - y = OffsetArray(_y, 0:(N+1)) + res = OffsetArray(_res, 0:(N + 1)) + y = OffsetArray(_y, 0:(N + 1)) ## Calculate residual for i in 1:N @@ -96,12 +104,36 @@ function true_sol_bratu(x) return -2 * log(cosh(θ * (x - 0.5) / 2) / (cosh(θ / 4))) end +u₀ = OffsetArray(u₀, 0:(l_N + 1)) +update!(u₀, l_N) + # ## Solving using inplace variant and CG uₖ, _ = newton_krylov!( - (res, u) -> bratu!(res, u, dx, λ, N), + (res, u) -> bratu!(res, u, dx, λ, l_N), copy(u₀), similar(u₀); Solver = CgSolver, - update! = (u)->update!(u, N), - norm = u->sqrt(MPI.Allreduce(sum(abs2, u), +, MPI.COMM_WORLD)) + update! = (u) -> update!(u, l_N), + norm = u -> sqrt(MPI.Allreduce(sum(abs2, u), +, MPI.COMM_WORLD)) ) +Uₖ = MPI.Gather(view(uₖ, 2:(l_N + 1)), MPI.COMM_WORLD) +if myid == 0 + using CairoMakie + + reference = true_sol_bratu.(X) + ϵ = abs2.(Uₖ .- reference) + + fig = Figure(size = (800, 800)) + ax = Axis(fig[1, 1], title = "", ylabel = "", xlabel = "") + + lines!(ax, X, reference, label = "True solution") + lines!(ax, X, U₀, label = "Initial guess") + lines!(ax, X, Uₖ, label = "Newton-Krylov solution") + + axislegend(ax, position = :cb) + + ax = Axis(fig[1, 2], title = "Error", ylabel = "abs2 err", xlabel = "") + lines!(ax, ϵ) + + save("bratu_mpi.png", fig) +end diff --git a/examples/mpiexecjl b/examples/mpiexecjl new file mode 100755 index 0000000..137c9b4 --- /dev/null +++ b/examples/mpiexecjl @@ -0,0 +1,76 @@ +#!/bin/sh +# +# Copyright (C) 2020 Simon Byrne, Mosè Giordano +# License is MIT "Expat" +# +### Commentary: +# +# Command line utility to call the `mpiexec` binary used by the `MPI.jl` version +# in the given Julia project. It has the same syntax as the `mpiexec` binary +# that would be called, with the additional `--project=...` flag to select a +# different Julia project. +# +# Examples of usage (the MPI flags available depend on the MPI implementation +# called): +# +# $ mpiexecjl --version +# $ mpiexecjl -n 40 julia mpi-script.jl +# $ mpiexecjl --project=my_experiment -n 80 --oversubscribe julia mpi-script.jl +# +### Code: + +usage () { + echo "Usage: ${0} [--project=...] MPIEXEC_ARGUMENTS..." + echo "Call the mpiexec binary in the Julia environment specified by the --project option." + echo "If no project is specified, the MPI associated with the global Julia environment will be used." + echo "All other arguments are forwarded to mpiexec." +} + +for arg; do + shift + case "${arg}" in + --project | --project=*) + PROJECT_ARG="${arg}" + ;; + -h | --help) + usage + echo "Below is the help of the current mpiexec." + echo + set -- "${@}" "${arg}" + ;; + *) + set -- "${@}" "${arg}" + ;; + esac +done + +if [ -z "${*}" ]; then + echo "ERROR: no arguments specified." 1>&2 + echo + usage + exit 1 +fi + +if [ -n "${JULIA_BINDIR}" ]; then + JULIA_CMD="${JULIA_BINDIR}/julia" +else + JULIA_CMD="julia" +fi + +# shellcheck disable=SC2016 +SCRIPT=' +using MPI +ENV["JULIA_PROJECT"] = dirname(Base.active_project()) +proc = run(pipeline(`$(mpiexec()) $(ARGS)`; stdout, stderr); wait=false) +wait(proc) +if !iszero(proc.exitcode) + @error "The MPI process failed" proc +end +exit(proc.exitcode) +' + +if [ -n "${PROJECT_ARG}" ]; then + "${JULIA_CMD}" "${PROJECT_ARG}" --color=yes --startup-file=no -q --compile=min -O0 -e "${SCRIPT}" -- "${@}" +else + "${JULIA_CMD}" --color=yes --startup-file=no -q --compile=min -O0 -e "${SCRIPT}" -- "${@}" +fi From af4767a3f3219a6c7e9bd115e626a86d0f1836bf Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 3 Dec 2024 10:46:18 +0100 Subject: [PATCH 4/7] use a custom array --- examples/bratu_mpi.jl | 112 ++++++++++++++++++++++++++++++++---------- src/NewtonKrylov.jl | 6 ++- 2 files changed, 91 insertions(+), 27 deletions(-) diff --git a/examples/bratu_mpi.jl b/examples/bratu_mpi.jl index a635f51..4d69c8d 100644 --- a/examples/bratu_mpi.jl +++ b/examples/bratu_mpi.jl @@ -9,10 +9,48 @@ using SparseArrays, LinearAlgebra using OffsetArrays using MPI -MPI.Init() +struct LocalData{FC, D} <: AbstractVector{FC} + data::D -const nranks = MPI.Comm_size(MPI.COMM_WORLD) -const myid = MPI.Comm_rank(MPI.COMM_WORLD) + function LocalData(data::D) where {D} + FC = eltype(data) + return new{FC, D}(data) + end +end + +function LocalData{FC, D}(::UndefInitializer, l::Int64) where {FC, D} + return LocalData(D(undef, l)) +end + +Base.length(v::LocalData) = return length(v.data) + +Base.size(v::LocalData) = return size(v.data) + +Base.@propagate_inbounds function Base.getindex(v::LocalData, idx) + return getindex(v.data, idx) +end + +Base.@propagate_inbounds function Base.setindex!(v::LocalData, x, idx) + return setindex!(v.data, x, idx) +end + +Base.similar(v::LocalData) = LocalData(similar(v.data)) +Base.copy(v::LocalData) = LocalData(copy(v.data)) +function Base.cconvert(::Type{MPI.MPIPtr}, buf::LocalData) + return buf.data +end + +using Krylov +import Krylov.FloatOrComplex + +function Krylov.kdot(n::Integer, x::LocalData{T}, y::LocalData{T}) where {T <: FloatOrComplex} + return MPI.Allreduce(dot(x.data, y.data), +, MPI.COMM_WORLD) +end + +function Krylov.knorm(n::Integer, x::LocalData{T}) where {T <: FloatOrComplex} + # TODO: We need to not double count the boundary + return sqrt(MPI.Allreduce(sum(abs2, x.data), +, MPI.COMM_WORLD)) +end # We are going to split a domain of size N # into equal chunks. @@ -24,28 +62,12 @@ function localdomain(N, nranks, myrank) return first:last end -# ## Choice of parameters -const N = 10_000 -const λ = 3.51382 -const dx = 1 / (N + 1) # Grid-spacing - -# ### Domain and Inital condition -LI = localdomain(N, nranks, myid) -const l_N = length(LI) -@show l_N -u₀ = Vector{Float64}(undef, l_N + 2) - -X = LinRange(0.0, 1.0, N) # Global -x = view(X, LI) - -u₀[2:(l_N + 1)] = sin.(x .* π) -U₀ = MPI.Gather(view(u₀, 2:(l_N + 1)), MPI.COMM_WORLD) # ## 1D Bratu equations # $y′′ + λ * exp(y) = 0$ -function update!(_y, N) - y = OffsetArray(_y, 0:(N + 1)) +function update!(_y::LocalData, N) + y = OffsetArray(_y.data, 0:(N + 1)) # Set boundary conditions nranks = MPI.Comm_size(MPI.COMM_WORLD) @@ -80,6 +102,9 @@ function bratu!(_res, _y, Δx, λ, N) res = OffsetArray(_res, 0:(N + 1)) y = OffsetArray(_y, 0:(N + 1)) + # res is N+2 and knorm doesn't know that + res[N + 1] = 0 + res[0] = 0 ## Calculate residual for i in 1:N y_l = y[i - 1] @@ -104,19 +129,56 @@ function true_sol_bratu(x) return -2 * log(cosh(θ * (x - 0.5) / 2) / (cosh(θ / 4))) end -u₀ = OffsetArray(u₀, 0:(l_N + 1)) +# # Setup +MPI.Init() + +const nranks = MPI.Comm_size(MPI.COMM_WORLD) +const myid = MPI.Comm_rank(MPI.COMM_WORLD) + +# ## Choice of parameters +const N = 10_000 +const λ = 3.51382 +const dx = 1 / (N + 1) # Grid-spacing + +# ### Domain and Inital condition +LI = localdomain(N, nranks, myid) +const l_N = length(LI) +u₀ = Vector{Float64}(undef, l_N + 2) + +X = LinRange(0.0 + dx, 1.0 - dx, N) # Global +x = view(X, LI) + +u₀[2:(l_N + 1)] = sin.(x .* π) + +u₀ = LocalData(u₀) #, 0:(l_N + 1)) update!(u₀, l_N) +U₀ = MPI.Gather(view(u₀, 2:(l_N + 1)), MPI.COMM_WORLD) + +@show Krylov.knorm(length(u₀), u₀) +if myid == 0 + @show Krylov.knorm(length(U₀), U₀) +end +@show typeof(similar(u₀)) +@show typeof(copy(u₀)) + # ## Solving using inplace variant and CG uₖ, _ = newton_krylov!( (res, u) -> bratu!(res, u, dx, λ, l_N), copy(u₀), similar(u₀); Solver = CgSolver, update! = (u) -> update!(u, l_N), - norm = u -> sqrt(MPI.Allreduce(sum(abs2, u), +, MPI.COMM_WORLD)) + norm = u -> Krylov.knorm(length(u), u), + verbose = myid == 0 ? 1 : 0 ) Uₖ = MPI.Gather(view(uₖ, 2:(l_N + 1)), MPI.COMM_WORLD) + +@show Krylov.knorm(length(uₖ), uₖ) +if myid == 0 + @show Krylov.knorm(length(Uₖ), Uₖ) +end + if myid == 0 using CairoMakie @@ -133,7 +195,7 @@ if myid == 0 axislegend(ax, position = :cb) ax = Axis(fig[1, 2], title = "Error", ylabel = "abs2 err", xlabel = "") - lines!(ax, ϵ) + lines!(ax, X, ϵ) - save("bratu_mpi.png", fig) + save("bratu_mpi_n$(nranks).png", fig) end diff --git a/src/NewtonKrylov.jl b/src/NewtonKrylov.jl index 66ecdba..76f7ea7 100644 --- a/src/NewtonKrylov.jl +++ b/src/NewtonKrylov.jl @@ -171,7 +171,7 @@ function newton_krylov!( update! = (u) -> nothing ) t₀ = time_ns() - + update!(u) # Impose BC F!(res, u) # res = F(u) n_res = norm(res) @@ -206,7 +206,9 @@ function newton_krylov!( # Solve: Jx = -res # res is modifyed by J, so we create a copy `-res` # TODO: provide a temporary storage for `-res` - solve!(solver, J, -res; kwargs...) + b = copy(res) + b .*= - 1 + solve!(solver, J, b; kwargs...) d = solver.x # Newton direction s = 1 # Newton step TODO: LineSearch From de64ea898b70ce7418f261e8107300042c26369d Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 3 Dec 2024 15:24:29 +0100 Subject: [PATCH 5/7] WIP: MPI krylov --- examples/bratu_mpi.jl | 105 ++++++++++++++++++++++-------------------- examples/simple.jl | 5 ++ src/NewtonKrylov.jl | 15 +++--- 3 files changed, 68 insertions(+), 57 deletions(-) diff --git a/examples/bratu_mpi.jl b/examples/bratu_mpi.jl index 4d69c8d..5c9e02f 100644 --- a/examples/bratu_mpi.jl +++ b/examples/bratu_mpi.jl @@ -19,37 +19,43 @@ struct LocalData{FC, D} <: AbstractVector{FC} end function LocalData{FC, D}(::UndefInitializer, l::Int64) where {FC, D} - return LocalData(D(undef, l)) + return LocalData(D(undef, l + 2)) end -Base.length(v::LocalData) = return length(v.data) - -Base.size(v::LocalData) = return size(v.data) +Base.length(v::LocalData) = return length(v.data) - 2 +Base.size(v::LocalData) = return (length(v),) Base.@propagate_inbounds function Base.getindex(v::LocalData, idx) - return getindex(v.data, idx) + return getindex(v.data, idx + 1) end Base.@propagate_inbounds function Base.setindex!(v::LocalData, x, idx) - return setindex!(v.data, x, idx) + return setindex!(v.data, x, idx + 1) end Base.similar(v::LocalData) = LocalData(similar(v.data)) Base.copy(v::LocalData) = LocalData(copy(v.data)) +Base.zero(v::LocalData) = LocalData(zero(v.data)) + function Base.cconvert(::Type{MPI.MPIPtr}, buf::LocalData) - return buf.data + return view(buf.data, 2:(length(buf) - 1)) +end +function MPI.Buffer(v::LocalData) + return MPI.Buffer(v, Cint(length(v)), MPI.Datatype(eltype(v))) end using Krylov import Krylov.FloatOrComplex function Krylov.kdot(n::Integer, x::LocalData{T}, y::LocalData{T}) where {T <: FloatOrComplex} - return MPI.Allreduce(dot(x.data, y.data), +, MPI.COMM_WORLD) + return @views MPI.Allreduce( + dot(x.data[2:(length(x) + 1)], y.data[2:(length(y) + 1)]), + +, MPI.COMM_WORLD + ) end function Krylov.knorm(n::Integer, x::LocalData{T}) where {T <: FloatOrComplex} - # TODO: We need to not double count the boundary - return sqrt(MPI.Allreduce(sum(abs2, x.data), +, MPI.COMM_WORLD)) + return @views sqrt(MPI.Allreduce(sum(abs2, x.data[2:(length(x) + 1)]), +, MPI.COMM_WORLD)) end # We are going to split a domain of size N @@ -66,9 +72,8 @@ end # ## 1D Bratu equations # $y′′ + λ * exp(y) = 0$ -function update!(_y::LocalData, N) - y = OffsetArray(_y.data, 0:(N + 1)) - +function update!(y::LocalData) + N = length(y) # Set boundary conditions nranks = MPI.Comm_size(MPI.COMM_WORLD) myid = MPI.Comm_rank(MPI.COMM_WORLD) @@ -83,28 +88,38 @@ function update!(_y::LocalData, N) ## domain splitting # reqs = MPI.MultiRequest(4) Enzyme strugles with this - reqs = [MPI.Request() for _ in 1:4] + # reqs = [MPI.Request() for _ in 1:4] + # if myid != (nranks - 1) + # # Send & data to the right + # MPI.Isend(view(y.data, N + 1), MPI.COMM_WORLD, reqs[1]; dest = myid + 1) + # MPI.Irecv!(view(y.data, N + 2), MPI.COMM_WORLD, reqs[2]; source = myid + 1) + # end + # if myid != 0 + # # Send data to the left + # MPI.Isend(view(y.data, 2), MPI.COMM_WORLD, reqs[3]; dest = myid - 1) + # MPI.Irecv!(view(y.data, 1), MPI.COMM_WORLD, reqs[4]; source = myid - 1) + # end + # MPI.Waitall(reqs) + if myid != (nranks - 1) # Send & data to the right - MPI.Isend(view(y, N), MPI.COMM_WORLD, reqs[1]; dest = myid + 1) - MPI.Irecv!(view(y, N + 1), MPI.COMM_WORLD, reqs[2]; source = myid + 1) + MPI.Send(view(y.data, N + 1), MPI.COMM_WORLD; dest = myid + 1) + MPI.Recv!(view(y.data, N + 2), MPI.COMM_WORLD; source = myid + 1) end if myid != 0 # Send data to the left - MPI.Isend(view(y, 1), MPI.COMM_WORLD, reqs[3]; dest = myid - 1) - MPI.Irecv!(view(y, 0), MPI.COMM_WORLD, reqs[4]; source = myid - 1) + MPI.Recv!(view(y.data, 1), MPI.COMM_WORLD; source = myid - 1) + MPI.Send(view(y.data, 2), MPI.COMM_WORLD; dest = myid - 1) end - MPI.Waitall(reqs) + return nothing end -function bratu!(_res, _y, Δx, λ, N) - res = OffsetArray(_res, 0:(N + 1)) - y = OffsetArray(_y, 0:(N + 1)) - - # res is N+2 and knorm doesn't know that - res[N + 1] = 0 - res[0] = 0 +function bratu!(res::LocalData, y::LocalData, Δx, λ) + update!(y) + # Must be part of the equation since otherwise the krylov solver + # Will not update the boundary conditions for the inner iterations + N = length(y) ## Calculate residual for i in 1:N y_l = y[i - 1] @@ -116,9 +131,9 @@ function bratu!(_res, _y, Δx, λ, N) return nothing end -function bratu(y, dx, λ, N) +function bratu(y, dx, λ) res = similar(y) - bratu!(res, y, dx, λ, N) + bratu!(res, y, dx, λ) return res end @@ -148,36 +163,28 @@ u₀ = Vector{Float64}(undef, l_N + 2) X = LinRange(0.0 + dx, 1.0 - dx, N) # Global x = view(X, LI) -u₀[2:(l_N + 1)] = sin.(x .* π) +u₀ = LocalData(u₀) +u₀ .= sin.(x .* π) +update!(u₀) -u₀ = LocalData(u₀) #, 0:(l_N + 1)) -update!(u₀, l_N) +# JOp = NewtonKrylov.JacobianOperator((res, u) -> bratu!(res, u, dx, λ), similar(u₀), u₀) +# J = collect(JOp) +# display(J) -U₀ = MPI.Gather(view(u₀, 2:(l_N + 1)), MPI.COMM_WORLD) - -@show Krylov.knorm(length(u₀), u₀) -if myid == 0 - @show Krylov.knorm(length(U₀), U₀) -end -@show typeof(similar(u₀)) -@show typeof(copy(u₀)) +U₀ = MPI.Gather(u₀, MPI.COMM_WORLD) # ## Solving using inplace variant and CG -uₖ, _ = newton_krylov!( - (res, u) -> bratu!(res, u, dx, λ, l_N), +uₖ, stats = newton_krylov!( + (res, u) -> bratu!(res, u, dx, λ), copy(u₀), similar(u₀); Solver = CgSolver, - update! = (u) -> update!(u, l_N), - norm = u -> Krylov.knorm(length(u), u), + # update! = (u) -> update!(u), + norm = v -> Krylov.knorm(length(v), v), verbose = myid == 0 ? 1 : 0 ) +@show stats -Uₖ = MPI.Gather(view(uₖ, 2:(l_N + 1)), MPI.COMM_WORLD) - -@show Krylov.knorm(length(uₖ), uₖ) -if myid == 0 - @show Krylov.knorm(length(Uₖ), Uₖ) -end +Uₖ = MPI.Gather(uₖ, MPI.COMM_WORLD) if myid == 0 using CairoMakie diff --git a/examples/simple.jl b/examples/simple.jl index 5347cab..05d862f 100644 --- a/examples/simple.jl +++ b/examples/simple.jl @@ -48,3 +48,8 @@ end lines!(ax, trace_3) fig + +# ## Extract Jacobian + +JOp = NewtonKrylov.JacobianOperator(F!, zeros(2), [3.0, 4.0]) +J = collect(JOp) diff --git a/src/NewtonKrylov.jl b/src/NewtonKrylov.jl index 76f7ea7..ab9cfa2 100644 --- a/src/NewtonKrylov.jl +++ b/src/NewtonKrylov.jl @@ -67,12 +67,12 @@ end function Base.collect(JOp::JacobianOperator) N, M = size(JOp) - v = zeros(eltype(JOp), M) - out = zeros(eltype(JOp), N) + v = zero(JOp.u) + out = zero(JOp.res) J = SparseMatrixCSC{eltype(v), Int}(undef, size(JOp)...) for j in 1:M - out .= 0.0 - v .= 0.0 + out .= 0 + v .= 0 v[j] = 1.0 mul!(out, JOp, v) for i in 1:N @@ -187,6 +187,7 @@ function newton_krylov!( J = JacobianOperator(F!, res, u) solver = Solver(J, res) + b = similar(res) stats = Stats(0, 0) while n_res > tol && stats.outer_iterations <= max_niter @@ -205,9 +206,7 @@ function newton_krylov!( # Solve: Jx = -res # res is modifyed by J, so we create a copy `-res` - # TODO: provide a temporary storage for `-res` - b = copy(res) - b .*= - 1 + b .= .- res solve!(solver, J, b; kwargs...) d = solver.x # Newton direction @@ -233,7 +232,7 @@ function newton_krylov!( η = forcing(η, tol, n_res, n_res_prior) end - verbose > 0 && @info "Newton" iter = n_res η stats + verbose > 0 && @info "Newton" resᵢ = n_res η stats stats = update(stats, solver.stats.niter) end t = (time_ns() - t₀) / 1.0e9 From c4ffbde54a78492df7e953c4f549e81b7100874b Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 17 Dec 2024 12:42:58 +0100 Subject: [PATCH 6/7] add FDJacobainOperator --- src/NewtonKrylov.jl | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/NewtonKrylov.jl b/src/NewtonKrylov.jl index ab9cfa2..09aeab4 100644 --- a/src/NewtonKrylov.jl +++ b/src/NewtonKrylov.jl @@ -19,7 +19,9 @@ function maybe_duplicated(f, df) end end -struct JacobianOperator{F, A} +abstract type AbstractJacobianOperator end + +struct JacobianOperator{F, A} <: AbstractJacobianOperator f::F # F!(res, u) f_cache::F res::A @@ -65,7 +67,7 @@ function mul!(out, J′::Union{Adjoint{<:Any, <:JacobianOperator}, Transpose{<:A return nothing end -function Base.collect(JOp::JacobianOperator) +function Base.collect(JOp::AbstractJacobianOperator) N, M = size(JOp) v = zero(JOp.u) out = zero(JOp.res) @@ -84,6 +86,29 @@ function Base.collect(JOp::JacobianOperator) return J end +struct FDJacobianOperator{F, A} <: AbstractJacobianOperator + f::F # F!(res, u) + res::A + u::A + function FDJacobianOperator(f::F, res, u) where {F} + return new{F, typeof(u)}(f, res, u) + end +end + +Base.size(J::FDJacobianOperator) = (length(J.res), length(J.u)) +Base.eltype(J::FDJacobianOperator) = eltype(J.u) + +function mul!(out, J::FDJacobianOperator, v) + ϵ = cbrt(eps()/2) + # (F(u + ϵ .* v) - F(u - ϵ .* v)) ./ 2ϵ + + J.f(J.res, J.u .+ ϵ .* v) + out .= J.res + J.f(J.res, J.u .- ϵ .* v) + out .= (out .- J.res) ./ 2ϵ + return nothing +end + ## # Newton-Krylov ## From 4cd3ac3c1f6576a94c1c68a59d72b886cd10f05c Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 17 Dec 2024 12:43:41 +0100 Subject: [PATCH 7/7] fixup! add FDJacobainOperator --- src/NewtonKrylov.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/NewtonKrylov.jl b/src/NewtonKrylov.jl index 09aeab4..c999f4c 100644 --- a/src/NewtonKrylov.jl +++ b/src/NewtonKrylov.jl @@ -193,7 +193,8 @@ function newton_krylov!( krylov_kwargs = (;), callback = (args...) -> nothing, norm = norm, - update! = (u) -> nothing + update! = (u) -> nothing, + Operator = JacobianOperator, ) t₀ = time_ns() @@ -210,7 +211,7 @@ function newton_krylov!( verbose > 0 && @info "Jacobian-Free Newton-Krylov" Solver res₀ = n_res tol tol_rel tol_abs η - J = JacobianOperator(F!, res, u) + J = Operator(F!, res, u) solver = Solver(J, res) b = similar(res)