diff --git a/Project.toml b/Project.toml index 18054ccf8..5d5c7a088 100644 --- a/Project.toml +++ b/Project.toml @@ -13,8 +13,8 @@ HalfIntegers = "f0d1745a-41c9-11e9-1dd9-e5d34d218721" KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36" +OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5" OptimKit = "77e91f04-9b3b-57a6-a776-40b61faaebe0" -Preferences = "21216c6a-2e73-6563-6e65-726566657250" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" @@ -35,10 +35,10 @@ HalfIntegers = "1.6.0" KrylovKit = "0.8.3" LinearAlgebra = "1.6" LoggingExtras = "~1.0" +OhMyThreads = "0.7.0" OptimKit = "0.3.1" Pkg = "1" Plots = "1.40" -Preferences = "1" Printf = "1" Random = "1" RecipesBase = "1.1" diff --git a/docs/src/man/parallelism.md b/docs/src/man/parallelism.md index 072d92f72..75bf42cc6 100644 --- a/docs/src/man/parallelism.md +++ b/docs/src/man/parallelism.md @@ -81,31 +81,22 @@ BLAS: libmkl_rt.so ## MPSKit multithreading -Within MPSKit, when Julia is started with multiple threads, by default the `Threads.@spawn` -machinery will be used to parallelize the code as much as possible. In particular, there are -three main places where this is happening, which can be disabled separately through a preference-based system. +Within MPSKit, when Julia is started with multiple threads, by default the `OhMyThreads.jl` +machinery will be used to parallelize the code as much as possible. In particular, this mostly +occurs whenever there is a unitcell and local updates can take place at each site in parallel. -1. During the process of some algorithms (e.g. VUMPS), local updates can take place at each - site in parallel. This can be controlled by the `parallelize_sites` preference. - -2. During the calculation of the environments, when the MPO is block-sparse, it is possible - to parallelize over these blocks. This can be enabled or disabled by the - `parallelize_transfers` preference. (Note that left- and right environments will always - be computed in parallel) - -3. During the calculation of the derivatives, when the MPO is block-sparse, it is possible - to parallelize over these blocks. This can be enabled or disabled by the - `parallelize_derivatives` preference. - -For convenience, these preferences can be set via [`MPSKit.Defaults.set_parallelization`](@ref), which takes as input pairs of preferences and booleans. For example, to disable all parallelization, one can call +The multithreading behaviour can be controlled through a global `scheduler`, which can be set +using the `MPSKit.Defaults.set_scheduler!(arg; kwargs...)` function. This function accepts +either a `Symbol`, an `OhMyThreads.Scheduler` or keywords to determine a scheduler automatically. ```julia -Defaults.set_parallelization("sites" => false, "transfers" => false, "derivatives" => false) +MPSKit.Defaults.set_scheduler!(:serial) # disable multithreading +MPSKit.Defaults.set_scheduler!(:greedy) # multithreading with greedy load-balancing +MPSKit.Defaults.set_scheduler!(:dynamic) # default: multithreading with some load-balancing ``` -!!! warning - These settings are statically set at compile-time, and for changes to take - effect the Julia session must be restarted. +For further reference on the available schedulers and finer control, please refer to the +[`OhMyThreads.jl` documentation](https://juliafolds2.github.io/OhMyThreads.jl/stable/) ## TensorKit multithreading diff --git a/src/MPSKit.jl b/src/MPSKit.jl index b17881683..338b43b44 100644 --- a/src/MPSKit.jl +++ b/src/MPSKit.jl @@ -17,6 +17,7 @@ using LinearAlgebra: LinearAlgebra using Random using Base: @kwdef using LoggingExtras +using OhMyThreads # bells and whistles for mpses export InfiniteMPS, FiniteMPS, WindowMPS, MultilineMPS @@ -159,4 +160,9 @@ include("algorithms/ED.jl") include("algorithms/unionalg.jl") +function __init__() + Defaults.set_scheduler!() + return nothing +end + end diff --git a/src/algorithms/approximate/vomps.jl b/src/algorithms/approximate/vomps.jl index 435996c4d..d8d6f5269 100644 --- a/src/algorithms/approximate/vomps.jl +++ b/src/algorithms/approximate/vomps.jl @@ -20,21 +20,14 @@ function approximate(ψ::MultilineMPS, toapprox::Tuple{<:MultilineMPO,<:Multilin alg::VOMPS, envs=environments(ψ, toapprox)) ϵ::Float64 = calc_galerkin(ψ, envs) temp_ACs = similar.(ψ.AC) + scheduler = Defaults.scheduler[] log = IterLog("VOMPS") LoggingExtras.withlevel(; alg.verbosity) do @infov 2 loginit!(log, ϵ) for iter in 1:(alg.maxiter) - @static if Defaults.parallelize_sites - @sync for col in 1:size(ψ, 2) - Threads.@spawn begin - temp_ACs[:, col] = _vomps_localupdate(col, ψ, toapprox, envs) - end - end - else - for col in 1:size(ψ, 2) - temp_ACs[:, col] = _vomps_localupdate(col, ψ, toapprox, envs) - end + tmap!(eachcol(temp_ACs), 1:size(ψ, 2); scheduler) do col + return _vomps_localupdate(col, ψ, toapprox, envs) end alg_gauge = updatetol(alg.alg_gauge, iter, ϵ) @@ -64,7 +57,10 @@ end function _vomps_localupdate(loc, ψ, (O, ψ₀), envs, factalg=QRpos()) local tmp_AC, tmp_C - @static if Defaults.parallelize_sites + if Defaults.scheduler[] isa SerialScheduler + tmp_AC = circshift([ac_proj(row, loc, ψ, envs) for row in 1:size(ψ, 1)], 1) + tmp_C = circshift([c_proj(row, loc, ψ, envs) for row in 1:size(ψ, 1)], 1) + else @sync begin Threads.@spawn begin tmp_AC = circshift([ac_proj(row, loc, ψ, envs) for row in 1:size(ψ, 1)], 1) @@ -73,9 +69,6 @@ function _vomps_localupdate(loc, ψ, (O, ψ₀), envs, factalg=QRpos()) tmp_C = circshift([c_proj(row, loc, ψ, envs) for row in 1:size(ψ, 1)], 1) end end - else - tmp_AC = circshift([ac_proj(row, loc, ψ, envs) for row in 1:size(ψ, 1)], 1) - tmp_C = circshift([c_proj(row, loc, ψ, envs) for row in 1:size(ψ, 1)], 1) end return regauge!.(tmp_AC, tmp_C; alg=factalg) end diff --git a/src/algorithms/changebonds/svdcut.jl b/src/algorithms/changebonds/svdcut.jl index 9850e1111..e7cc1d871 100644 --- a/src/algorithms/changebonds/svdcut.jl +++ b/src/algorithms/changebonds/svdcut.jl @@ -91,7 +91,7 @@ function changebonds(ψ::InfiniteMPS, alg::SvdCut) ψ = if space(ncr, 1) != space(copied[1], 1) InfiniteMPS(copied) else - C₀ = TensorMap(complex(ncr)) + C₀ = ncr isa TensorMap ? complex(ncr) : TensorMap(complex(ncr)) InfiniteMPS(copied, C₀) end return normalize!(ψ) diff --git a/src/algorithms/derivatives.jl b/src/algorithms/derivatives.jl index 0108df218..ad562a799 100644 --- a/src/algorithms/derivatives.jl +++ b/src/algorithms/derivatives.jl @@ -105,25 +105,6 @@ function ∂AC2(x::Vector, opp1, opp2, leftenv, rightenv) return circshift(map(∂AC2, x, opp1, opp2, leftenv, rightenv), 1) end -""" - Zero-site derivative (the C matrix to the right of pos) -""" -function ∂C(x::MPSBondTensor, leftenv::AbstractVector, rightenv::AbstractVector)::typeof(x) - if Defaults.parallelize_derivatives - @floop WorkStealingEx() for (le, re) in zip(leftenv, rightenv) - t = ∂C(x, le, re) - @reduce(y = inplace_add!(nothing, t)) - end - else - y = ∂C(x, leftenv[1], rightenv[1]) - for (le, re) in zip(leftenv[2:end], rightenv[2:end]) - VectorInterface.add!(y, ∂C(x, le, re)) - end - end - - return y -end - function ∂C(x::MPSBondTensor, leftenv::MPSTensor, rightenv::MPSTensor) @plansor y[-1; -2] := leftenv[-1 3; 1] * x[1; 2] * rightenv[2 3; -2] return y isa BlockTensorMap ? only(y) : y diff --git a/src/algorithms/excitation/quasiparticleexcitation.jl b/src/algorithms/excitation/quasiparticleexcitation.jl index a6158dab7..e93ddf961 100644 --- a/src/algorithms/excitation/quasiparticleexcitation.jl +++ b/src/algorithms/excitation/quasiparticleexcitation.jl @@ -247,19 +247,9 @@ function effective_excitation_hamiltonian(H::Union{InfiniteMPOHamiltonian, envs.leftenvs, envs.rightenvs)) ϕ′ = similar(ϕ) - @static if Defaults.parallelize_sites - @sync for loc in 1:length(ϕ) - Threads.@spawn begin - ϕ′[loc] = _effective_excitation_local_apply(loc, ϕ, H, energy[loc], - envs) - end - end - else - for loc in 1:length(ϕ) - ϕ′[loc] = _effective_excitation_local_apply(loc, ϕ, H, energy[loc], envs) - end + tforeach(1:length(ϕ); scheduler=Defaults.scheduler[]) do loc + return ϕ′[loc] = _effective_excitation_local_apply(loc, ϕ, H, energy[loc], envs) end - return ϕ′ end diff --git a/src/algorithms/grassmann.jl b/src/algorithms/grassmann.jl index b8f9f2e3a..b1efe5a42 100644 --- a/src/algorithms/grassmann.jl +++ b/src/algorithms/grassmann.jl @@ -12,6 +12,7 @@ module GrassmannMPS using ..MPSKit using TensorKit +using OhMyThreads import TensorKitManifolds.Grassmann function TensorKit.rmul!(a::Grassmann.GrassmannTangent, b::AbstractTensorMap) @@ -67,23 +68,38 @@ struct ManifoldPoint{T,E,G,C} Rhoreg::C # the regularized density matrices end -function ManifoldPoint(state::Union{InfiniteMPS,FiniteMPS}, envs) +function ManifoldPoint(state::FiniteMPS, envs) al_d = similar(state.AL) O = envs.operator for i in 1:length(state) al_d[i] = MPSKit.∂∂AC(i, state, O, envs) * state.AC[i] end - g = Grassmann.project.(al_d, state.AL) Rhoreg = Vector{eltype(state.C)}(undef, length(state)) δmin = sqrt(eps(real(scalartype(state)))) - for i in 1:length(state) - Rhoreg[i] = regularize(state.C[i], max(norm(g[i]) / 10, δmin)) + tmap!(Rhoreg, 1:length(state); scheduler=MPSKit.Defaults.scheduler[]) do i + return regularize(state.C[i], max(norm(g[i]) / 10, δmin)) end return ManifoldPoint(state, envs, g, Rhoreg) end +function ManifoldPoint(state::InfiniteMPS, envs) + δmin = sqrt(eps(real(scalartype(state)))) + Tg = Core.Compiler.return_type(Grassmann.project, + Tuple{eltype(state.AL),eltype(state.AL)}) + g = similar(state.AL, Tg) + ρ = similar(state.C) + + MPSKit.check_recalculate!(envs, state) + tforeach(1:length(state); scheduler=MPSKit.Defaults.scheduler[]) do i + AC′ = MPSKit.∂∂AC(i, state, envs.operator, envs) * state.AC[i] + g[i] = Grassmann.project(AC′, state.AL[i]) + ρ[i] = regularize(state.C[i], max(norm(g[i]) / 10, δmin)) + return nothing + end + return ManifoldPoint(state, envs, g, ρ) +end function ManifoldPoint(state::MultilineMPS, envs) # FIXME: add support for unitcells @@ -115,10 +131,10 @@ cell as tangent vectors on Grassmann manifolds. """ function fg(x::ManifoldPoint{T}) where {T<:Union{InfiniteMPS,FiniteMPS}} # the gradient I want to return is the preconditioned gradient! - g_prec = Vector{PrecGrad{eltype(x.g),eltype(x.Rhoreg)}}(undef, length(x.g)) - - for i in 1:length(x.state) - g_prec[i] = PrecGrad(rmul!(copy(x.g[i]), x.state.C[i]'), x.Rhoreg[i]) + Tg = Core.Compiler.return_type(PrecGrad, Tuple{eltype(x.g),eltype(x.Rhoreg)}) + g_prec = similar(x.g, Tg) + tmap!(g_prec, eachindex(x.g); scheduler=MPSKit.Defaults.scheduler[]) do i + return PrecGrad(rmul!(copy(x.g[i]), x.state.C[i]'), x.Rhoreg[i]) end # TODO: the operator really should not be part of the environments, and this should @@ -151,10 +167,10 @@ function retract(x::ManifoldPoint{<:MultilineMPS}, tg, alpha) g = reshape(tg, size(x.state)) nal = similar(x.state.AL) - h = similar(g) - for (i, cg) in enumerate(tg) - (nal[i], th) = Grassmann.retract(x.state.AL[i], cg.Pg, alpha) - h[i] = PrecGrad(th) + h = similar(tg) + tmap!(h, eachindex(g); scheduler=MPSKit.Defaults.scheduler[]) do i + nal[i], th = Grassmann.retract(x.state.AL[i], g[i].Pg, alpha) + return PrecGrad(th) end nstate = MPSKit.MultilineMPS(nal, x.state.C[:, end]) @@ -171,9 +187,10 @@ function retract(x::ManifoldPoint{<:InfiniteMPS}, g, alpha) envs = x.envs nal = similar(state.AL) h = similar(g) # The tangent at the end-point - for i in 1:length(g) + + tmap!(h, eachindex(g); scheduler=MPSKit.Defaults.scheduler[]) do i nal[i], th = Grassmann.retract(state.AL[i], g[i].Pg, alpha) - h[i] = PrecGrad(th) + return PrecGrad(th) end nstate = InfiniteMPS(nal, state.C[end]) @@ -209,9 +226,9 @@ Transport a tangent vector `h` along the retraction from `x` in direction `g` by `alpha`. `xp` is the end-point of the retraction. """ function transport!(h, x, g, alpha, xp) - for i in 1:length(h) - h[i] = PrecGrad(Grassmann.transport!(h[i].Pg, x.state.AL[i], g[i].Pg, alpha, - xp.state.AL[i])) + tforeach(1:length(h); scheduler=MPSKit.Defaults.scheduler[]) do i + return h[i] = PrecGrad(Grassmann.transport!(h[i].Pg, x.state.AL[i], g[i].Pg, alpha, + xp.state.AL[i])) end return h end diff --git a/src/algorithms/groundstate/gradient_grassmann.jl b/src/algorithms/groundstate/gradient_grassmann.jl index 8ee1e37c9..df0c3d15d 100644 --- a/src/algorithms/groundstate/gradient_grassmann.jl +++ b/src/algorithms/groundstate/gradient_grassmann.jl @@ -28,13 +28,17 @@ struct GradientGrassmann{O<:OptimKit.OptimizationAlgorithm,F} <: Algorithm finalize!::F function GradientGrassmann(; method=ConjugateGradient, (finalize!)=OptimKit._finalize!, - tol=Defaults.tol, maxiter=Defaults.maxiter, verbosity=2) + tol=Defaults.tol, maxiter=Defaults.maxiter, + verbosity=Defaults.verbosity - 1) if isa(method, OptimKit.OptimizationAlgorithm) # We were given an optimisation method, just use it. m = method elseif method <: OptimKit.OptimizationAlgorithm # We were given an optimisation method type, construct an instance of it. - m = method(; maxiter=maxiter, verbosity=verbosity, gradtol=tol) + # restrict linesearch maxiter + linesearch = OptimKit.HagerZhangLineSearch(; verbosity=verbosity - 2, + maxiter=100) + m = method(; maxiter, verbosity, gradtol=tol, linesearch) else msg = "method should be either an instance or a subtype of `OptimKit.OptimizationAlgorithm`." throw(ArgumentError(msg)) diff --git a/src/algorithms/groundstate/vumps.jl b/src/algorithms/groundstate/vumps.jl index 0957b841b..7f96bfae3 100644 --- a/src/algorithms/groundstate/vumps.jl +++ b/src/algorithms/groundstate/vumps.jl @@ -30,22 +30,15 @@ function find_groundstate(ψ::InfiniteMPS, H, alg::VUMPS, envs=environments(ψ, # initialization ϵ::Float64 = calc_galerkin(ψ, envs) temp_ACs = similar.(ψ.AC) + scheduler = Defaults.scheduler[] log = IterLog("VUMPS") LoggingExtras.withlevel(; alg.verbosity) do @infov 2 loginit!(log, ϵ, sum(expectation_value(ψ, H, envs))) for iter in 1:(alg.maxiter) alg_eigsolve = updatetol(alg.alg_eigsolve, iter, ϵ) - @static if Defaults.parallelize_sites - @sync for loc in 1:length(ψ) - Threads.@spawn begin - temp_ACs[loc] = _vumps_localupdate(loc, ψ, H, envs, alg_eigsolve) - end - end - else - for loc in 1:length(ψ) - temp_ACs[loc] = _vumps_localupdate(loc, ψ, H, envs, alg_eigsolve) - end + tmap!(temp_ACs, 1:length(ψ); scheduler) do loc + return _vumps_localupdate(loc, ψ, H, envs, alg_eigsolve) end alg_gauge = updatetol(alg.alg_gauge, iter, ϵ) @@ -76,7 +69,10 @@ end function _vumps_localupdate(loc, ψ, H, envs, eigalg, factalg=QRpos()) local AC′, C′ - @static if Defaults.parallelize_sites + if Defaults.scheduler[] isa SerialScheduler + _, AC′ = fixedpoint(∂∂AC(loc, ψ, H, envs), ψ.AC[loc], :SR, eigalg) + _, C′ = fixedpoint(∂∂C(loc, ψ, H, envs), ψ.C[loc], :SR, eigalg) + else @sync begin Threads.@spawn begin _, AC′ = fixedpoint(∂∂AC(loc, ψ, H, envs), ψ.AC[loc], :SR, eigalg) @@ -85,9 +81,6 @@ function _vumps_localupdate(loc, ψ, H, envs, eigalg, factalg=QRpos()) _, C′ = fixedpoint(∂∂C(loc, ψ, H, envs), ψ.C[loc], :SR, eigalg) end end - else - _, AC′ = fixedpoint(∂∂AC(loc, ψ, H, envs), ψ.AC[loc], :SR, eigalg) - _, C′ = fixedpoint(∂∂C(loc, ψ, H, envs), ψ.C[loc], :SR, eigalg) end return regauge!(AC′, C′; alg=factalg) end diff --git a/src/algorithms/statmech/vomps.jl b/src/algorithms/statmech/vomps.jl index e9ad4b9aa..8116e0869 100644 --- a/src/algorithms/statmech/vomps.jl +++ b/src/algorithms/statmech/vomps.jl @@ -28,39 +28,16 @@ function leading_boundary(ψ::MultilineMPS, O::MultilineMPO, alg::VOMPS, envs=environments(ψ, O)) ϵ::Float64 = calc_galerkin(ψ, envs) temp_ACs = similar.(ψ.AC) - temp_Cs = similar.(ψ.C) + scheduler = Defaults.scheduler[] log = IterLog("VOMPS") LoggingExtras.withlevel(; alg.verbosity) do @infov 2 loginit!(log, ϵ, expectation_value(ψ, O, envs)) for iter in 1:(alg.maxiter) - @static if Defaults.parallelize_sites - @sync for col in 1:size(ψ, 2) - Threads.@spawn begin - H_AC = ∂∂AC(col, ψ, O, envs) - ac = ψ.AC[:, col] - temp_ACs[:, col] .= H_AC(ac) - end - - Threads.@spawn begin - H_C = ∂∂C(col, ψ, O, envs) - c = ψ.C[:, col] - temp_Cs[:, col] .= H_C(c) - end - end - else - for col in 1:size(ψ, 2) - H_AC = ∂∂AC(col, ψ, O, envs) - ac = ψ.AC[:, col] - temp_ACs[:, col] .= H_AC(ac) - - H_C = ∂∂C(col, ψ, O, envs) - c = ψ.C[:, col] - temp_Cs[:, col] .= H_C(c) - end + tmap!(eachcol(temp_ACs), 1:size(ψ, 2); scheduler) do col + return _vomps_localupdate(col, ψ, O, envs) end - regauge!.(temp_ACs, temp_Cs; alg=TensorKit.QRpos()) alg_gauge = updatetol(alg.alg_gauge, iter, ϵ) ψ = MultilineMPS(temp_ACs, ψ.C[:, end]; alg_gauge.tol, alg_gauge.maxiter) @@ -85,3 +62,21 @@ function leading_boundary(ψ::MultilineMPS, O::MultilineMPO, alg::VOMPS, return ψ, envs, ϵ end + +function _vomps_localupdate(col, ψ::MultilineMPS, O::MultilineMPO, envs, factalg=QRpos()) + local AC′, C′ + if Defaults.scheduler[] isa SerialScheduler + AC′ = ∂∂AC(col, ψ, O, envs) * ψ.AC[:, col] + C′ = ∂∂C(col, ψ, O, envs) * ψ.C[:, col] + else + @sync begin + Threads.@spawn begin + AC′ = ∂∂AC(col, ψ, O, envs) * ψ.AC[:, col] + end + Threads.@spawn begin + C′ = ∂∂C(col, ψ, O, envs) * ψ.C[:, col] + end + end + end + return regauge!.(AC′, C′; alg=factalg)[:] +end diff --git a/src/algorithms/statmech/vumps.jl b/src/algorithms/statmech/vumps.jl index e6e173fcf..e9baf7d6b 100644 --- a/src/algorithms/statmech/vumps.jl +++ b/src/algorithms/statmech/vumps.jl @@ -20,44 +20,16 @@ end function leading_boundary(ψ::MultilineMPS, H, alg::VUMPS, envs=environments(ψ, H)) ϵ::Float64 = calc_galerkin(ψ, envs) temp_ACs = similar.(ψ.AC) - temp_Cs = similar.(ψ.C) + scheduler = Defaults.scheduler[] log = IterLog("VUMPS") LoggingExtras.withlevel(; alg.verbosity) do @infov 2 loginit!(log, ϵ, expectation_value(ψ, H, envs)) for iter in 1:(alg.maxiter) alg_eigsolve = updatetol(alg.alg_eigsolve, iter, ϵ) - @static if Defaults.parallelize_sites - @sync for col in 1:size(ψ, 2) - Threads.@spawn begin - H_AC = ∂∂AC($col, $ψ, $H, $envs) - ac = $ψ.AC[:, col] - _, ac′ = fixedpoint(H_AC, ac, :LM, alg_eigsolve) - $temp_ACs[:, col] = ac′[:] - end - - Threads.@spawn begin - H_C = ∂∂C($col, $ψ, $H, $envs) - c = $ψ.C[:, col] - _, c′ = fixedpoint(H_C, c, :LM, alg_eigsolve) - $temp_Cs[:, col] = c′[:] - end - end - else - for col in 1:size(ψ, 2) - H_AC = ∂∂AC(col, ψ, H, envs) - ac = ψ.AC[:, col] - _, ac′ = fixedpoint(H_AC, ac, :LM, alg_eigsolve) - temp_ACs[:, col] = ac′[:] - - H_C = ∂∂C(col, ψ, H, envs) - c = ψ.C[:, col] - _, c′ = fixedpoint(H_C, c, :LM, alg_eigsolve) - temp_Cs[:, col] = c′[:] - end + tmap!(eachcol(temp_ACs), 1:size(ψ, 2); scheduler) do col + return _vumps_localupdate(col, ψ, H, envs, alg_eigsolve) end - - regauge!.(temp_ACs, temp_Cs; alg=TensorKit.QRpos()) alg_gauge = updatetol(alg.alg_gauge, iter, ϵ) ψ = MultilineMPS(temp_ACs, ψ.C[:, end]; alg_gauge.tol, alg_gauge.maxiter) @@ -82,3 +54,22 @@ function leading_boundary(ψ::MultilineMPS, H, alg::VUMPS, envs=environments(ψ, return ψ, envs, ϵ end + +function _vumps_localupdate(col, ψ::MultilineMPS, O::MultilineMPO, envs, eigalg, + factalg=QRpos()) + local AC′, C′ + if Defaults.scheduler[] isa SerialScheduler + _, AC′ = fixedpoint(∂∂AC(col, ψ, O, envs), ψ.AC[:, col], :LM, eigalg) + _, C′ = fixedpoint(∂∂C(col, ψ, O, envs), ψ.C[:, col], :LM, eigalg) + else + @sync begin + Threads.@spawn begin + _, AC′ = fixedpoint(∂∂AC(col, ψ, O, envs), ψ.AC[:, col], :LM, eigalg) + end + Threads.@spawn begin + _, C′ = fixedpoint(∂∂C(col, ψ, O, envs), ψ.C[:, col], :LM, eigalg) + end + end + end + return regauge!.(AC′, C′; alg=factalg)[:] +end diff --git a/src/algorithms/timestep/tdvp.jl b/src/algorithms/timestep/tdvp.jl index d4d3adf44..09fe6c8f3 100644 --- a/src/algorithms/timestep/tdvp.jl +++ b/src/algorithms/timestep/tdvp.jl @@ -22,35 +22,38 @@ function timestep(ψ::InfiniteMPS, H, t::Number, dt::Number, alg::TDVP, envs::AbstractMPSEnvironments=environments(ψ, H); leftorthflag=true) temp_ACs = similar(ψ.AC) - temp_CRs = similar(ψ.C) + temp_Cs = similar(ψ.C) - @static if Defaults.parallelize_sites - @sync for (loc, (ac, c)) in enumerate(zip(ψ.AC, ψ.C)) + scheduler = Defaults.scheduler[] + if scheduler isa SerialScheduler + temp_ACs = tmap!(temp_ACs, 1:length(ψ); scheduler) do loc + return integrate(∂∂AC(loc, ψ, H, envs), ψ.AC[loc], t, dt, alg.integrator) + end + temp_Cs = tmap!(temp_Cs, 1:length(ψ); scheduler) do loc + return integrate(∂∂C(loc, ψ, H, envs), ψ.C[loc], t, dt, alg.integrator) + end + else + @sync begin Threads.@spawn begin - h_ac = ∂∂AC(loc, ψ, H, envs) - temp_ACs[loc] = integrate(h_ac, ac, t, dt, alg.integrator) + temp_ACs = tmap!(temp_ACs, 1:length(ψ); scheduler) do loc + return integrate(∂∂AC(loc, ψ, H, envs), ψ.AC[loc], t, dt, + alg.integrator) + end end - Threads.@spawn begin - h_c = ∂∂C(loc, ψ, H, envs) - temp_CRs[loc] = integrate(h_c, c, t, dt, alg.integrator) + temp_Cs = tmap!(temp_Cs, 1:length(ψ); scheduler) do loc + return integrate(∂∂C(loc, ψ, H, envs), ψ.C[loc], t, dt, alg.integrator) + end end end - else - for (loc, (ac, c)) in enumerate(zip(ψ.AC, ψ.C)) - h_ac = ∂∂AC(loc, ψ, H, envs) - temp_ACs[loc] = integrate(h_ac, ac, t, dt, alg.integrator) - h_c = ∂∂C(loc, ψ, H, envs) - temp_CRs[loc] = integrate(h_c, c, t, dt, alg.integrator) - end end if leftorthflag - regauge!.(temp_ACs, temp_CRs; alg=TensorKit.QRpos()) + regauge!.(temp_ACs, temp_Cs; alg=TensorKit.QRpos()) ψ′ = InfiniteMPS(temp_ACs, ψ.C[end]; tol=alg.tolgauge, maxiter=alg.gaugemaxiter) else - circshift!(temp_CRs, 1) - regauge!.(temp_CRs, temp_ACs; alg=TensorKit.LQpos()) + circshift!(temp_Cs, 1) + regauge!.(temp_Cs, temp_ACs; alg=TensorKit.LQpos()) ψ′ = InfiniteMPS(ψ.C[0], temp_ACs; tol=alg.tolgauge, maxiter=alg.gaugemaxiter) end diff --git a/src/environments/abstract_envs.jl b/src/environments/abstract_envs.jl index 21965322f..f7ce69df0 100644 --- a/src/environments/abstract_envs.jl +++ b/src/environments/abstract_envs.jl @@ -94,7 +94,7 @@ function recalculate!(envs::AbstractInfiniteEnvironments, state; tol=envs.solver end solver = envs.solver - envs.solver = solver.tol == tol ? solver : @set solver.tol = tol + envs.solver = solver.tol == tol ? solver : Accessors.@set solver.tol = tol envs.dependency = state @sync begin diff --git a/src/environments/multiple_envs.jl b/src/environments/multiple_envs.jl index ac4f8a4d4..6eb2a75b3 100644 --- a/src/environments/multiple_envs.jl +++ b/src/environments/multiple_envs.jl @@ -59,6 +59,13 @@ function recalculate!(env::MultipleEnvironments, args...; kwargs...) return env end +function check_recalculate!(env::MultipleEnvironments, args...; kwargs...) + for subenv in env.envs + check_recalculate!(subenv, args...; kwargs...) + end + return env +end + #maybe this can be used to provide compatibility with existing code? function Base.getproperty(envs::MultipleEnvironments, prop::Symbol) if prop === :solver diff --git a/src/utility/defaults.jl b/src/utility/defaults.jl index 4ea6552f4..ecd74e0fb 100644 --- a/src/utility/defaults.jl +++ b/src/utility/defaults.jl @@ -5,8 +5,8 @@ Some default values and settings for MPSKit. """ module Defaults -using Preferences import KrylovKit: GMRES, Arnoldi, Lanczos +using OhMyThreads using ..MPSKit: DynamicTol const VERBOSE_NONE = 0 @@ -66,29 +66,30 @@ function alg_expsolve(; tol=tol, maxiter=maxiter, verbosity=0, Arnoldi(; tol, maxiter, krylovdim, verbosity) end -# Preferences -# ----------- +""" + const scheduler -function set_parallelization(options::Pair{String,Bool}...) - for (key, val) in options - if !(key in ("sites", "derivatives", "transfers")) - throw(ArgumentError("Invalid option: \"$(key)\"")) - end +A scoped value that controls the current settings for multi-threading, typically used to parallelize over unitcells. +This value is best controlled using [`set_scheduler!`](@ref). +""" +const scheduler = Ref{Scheduler}() - @set_preferences!("parallelize_$key" => val) - end +""" + set_scheduler!([scheduler]; kwargs...) - sites = @load_preference("parallelize_sites", nothing) - derivatives = @load_preference("parallelize_derivatives", nothing) - transfers = @load_preference("parallelize_transfers", nothing) - @info "Parallelization changed; restart your Julia session for this change to take effect!" sites derivatives transfers - return nothing -end +Set the `OhMyThreads` multi-threading scheduler parameters. -const parallelize_sites = @load_preference("parallelize_sites", Threads.nthreads() > 1) -const parallelize_derivatives = @load_preference("parallelize_derivatives", - Threads.nthreads() > 1) -const parallelize_transfers = @load_preference("parallelize_transfers", - Threads.nthreads() > 1) +The function either accepts a `scheduler` as an `OhMyThreads.Scheduler` or as a symbol where the corresponding parameters are specificed as keyword arguments. +For a detailed description of all schedulers and their keyword arguments consult the [`OhMyThreads` documentation](https://juliafolds2.github.io/OhMyThreads.jl/stable/refs/api/#Schedulers). +""" +function set_scheduler!(sc=OhMyThreads.Implementation.NotGiven(); kwargs...) + if isempty(kwargs) && sc isa OhMyThreads.Implementation.NotGiven + # default value: Serial if single-threaded, Dynamic otherwise + scheduler[] = Threads.nthreads() == 1 ? SerialScheduler() : DynamicScheduler() + else + scheduler[] = OhMyThreads.Implementation._scheduler_from_userinput(sc; kwargs...) + end + return scheduler[] +end end diff --git a/src/utility/dynamictols.jl b/src/utility/dynamictols.jl index 74b2665c5..34c3190d8 100644 --- a/src/utility/dynamictols.jl +++ b/src/utility/dynamictols.jl @@ -54,7 +54,7 @@ end # default implementation with Accessors.jl, but can be hooked into function _updatetol(alg, tol::Real) - return @set alg.tol = tol + return Accessors.@set alg.tol = tol end end diff --git a/test/algorithms.jl b/test/algorithms.jl index a596a093a..6d85fa129 100644 --- a/test/algorithms.jl +++ b/test/algorithms.jl @@ -188,7 +188,7 @@ end # test using XXZ model, Δ > 1 is gapped spin = 1 - local_operators = [S_xx(; spin), S_yy(; spin), 0.7 * S_zz(; spin)] + local_operators = [S_xx(; spin), S_yy(; spin), 1.7 * S_zz(; spin)] Pspace = space(local_operators[1], 1) lattice = fill(Pspace, L) @@ -205,23 +205,24 @@ end @testset "DMRG" begin # test logging passes ψ, envs, δ = find_groundstate(ψ₀, H_lazy, - DMRG(; tol, verbosity=5, maxiter=1)) + DMRG(; tol, verbosity=verbosity_full, maxiter=1)) # compare states - alg = DMRG(; tol, verbosity=1) - ψ_lazy, envs, δ = find_groundstate(ψ₀, H_lazy, alg) + alg = DMRG(; tol, verbosity=verbosity_conv) + ψ, envs, δ = find_groundstate(ψ, H_lazy, alg) - @test abs(dot(ψ₀, ψ_lazy)) ≈ 1 atol = atol + @test abs(dot(ψ₀, ψ)) ≈ 1 atol = atol end @testset "DMRG2" begin # test logging passes trscheme = truncdim(floor(Int, D * 1.5)) ψ, envs, δ = find_groundstate(ψ₀, H_lazy, - DMRG2(; tol, verbosity=5, maxiter=1, trscheme)) + DMRG2(; tol, verbosity=verbosity_full, maxiter=1, + trscheme)) # compare states - alg = DMRG2(; tol, verbosity=1, trscheme) + alg = DMRG2(; tol, verbosity=verbosity_conv, trscheme) ψ, = find_groundstate(ψ₀, H, alg) ψ_lazy, envs, δ = find_groundstate(ψ₀, H_lazy, alg) @@ -231,10 +232,11 @@ end @testset "GradientGrassmann" begin # test logging passes ψ, envs, δ = find_groundstate(ψ₀, H_lazy, - GradientGrassmann(; tol, verbosity=5, maxiter=2)) + GradientGrassmann(; tol, verbosity=verbosity_full, + maxiter=2)) # compare states - alg = GradientGrassmann(; tol, verbosity=1) + alg = GradientGrassmann(; tol, verbosity=verbosity_conv) ψ, = find_groundstate(ψ₀, H, alg) ψ_lazy, envs, δ = find_groundstate(ψ₀, H_lazy, alg) @@ -244,12 +246,11 @@ end @testset "LazySum InfiniteMPS groundstate" verbose = true begin tol = 1e-8 - D = 15 + D = 16 atol = 1e-2 - # test using XXZ model, Δ > 1 is gapped spin = 1 - local_operators = [S_xx(; spin), S_yy(; spin), S_zz(; spin)] + local_operators = [S_xx(; spin), S_yy(; spin), 0.7 * S_zz(; spin)] Pspace = space(local_operators[1], 1) lattice = PeriodicVector([Pspace]) mpo_hamiltonians = map(local_operators) do O @@ -264,24 +265,26 @@ end @testset "VUMPS" begin # test logging passes - ψ, envs, δ = find_groundstate(ψ₀, H_lazy, VUMPS(; tol, verbosity=5, maxiter=2)) + ψ, envs, δ = find_groundstate(ψ₀, H_lazy, + VUMPS(; tol, verbosity=verbosity_full, maxiter=2)) # compare states - alg = VUMPS(; tol, verbosity=2) - ψ_lazy, envs, δ = find_groundstate(ψ₀, H_lazy, alg) + alg = VUMPS(; tol, verbosity=verbosity_conv) + ψ, envs, δ = find_groundstate(ψ, H_lazy, alg) - @test abs(dot(ψ₀, ψ_lazy)) ≈ 1 atol = atol + @test abs(dot(ψ₀, ψ)) ≈ 1 atol = atol end @testset "IDMRG1" begin # test logging passes - ψ, envs, δ = find_groundstate(ψ₀, H_lazy, IDMRG1(; tol, verbosity=5, maxiter=2)) + ψ, envs, δ = find_groundstate(ψ₀, H_lazy, + IDMRG1(; tol, verbosity=verbosity_full, maxiter=2)) # compare states - alg = IDMRG1(; tol, verbosity=2) - ψ_lazy, envs, δ = find_groundstate(ψ₀, H_lazy, alg) + alg = IDMRG1(; tol, verbosity=verbosity_conv, maxiter=300) + ψ, envs, δ = find_groundstate(ψ, H_lazy, alg) - @test abs(dot(ψ₀, ψ_lazy)) ≈ 1 atol = atol + @test abs(dot(ψ₀, ψ)) ≈ 1 atol = atol end @testset "IDMRG2" begin @@ -292,26 +295,27 @@ end trscheme = truncdim(floor(Int, D * 1.5)) # test logging passes ψ, envs, δ = find_groundstate(ψ₀′, H_lazy′, - IDMRG2(; tol, verbosity=5, maxiter=2, trscheme)) + IDMRG2(; tol, verbosity=verbosity_full, maxiter=2, + trscheme)) # compare states - alg = IDMRG2(; tol, verbosity=2, trscheme) - ψ_lazy, envs, δ = find_groundstate(ψ₀′, H_lazy′, alg) + alg = IDMRG2(; tol, verbosity=verbosity_conv, trscheme) + ψ, envs, δ = find_groundstate(ψ, H_lazy′, alg) - @test abs(dot(ψ₀′, ψ_lazy)) ≈ 1 atol = atol + @test abs(dot(ψ₀′, ψ)) ≈ 1 atol = atol end @testset "GradientGrassmann" begin # test logging passes ψ, envs, δ = find_groundstate(ψ₀, H_lazy, - GradientGrassmann(; tol, verbosity=5, maxiter=2)) + GradientGrassmann(; tol, verbosity=verbosity_full, + maxiter=2)) # compare states - alg = GradientGrassmann(; tol, verbosity=1) - ψ_lazy, envs, δ = find_groundstate(ψ₀, H_lazy, alg) - ψ, = find_groundstate(ψ₀, H, alg) + alg = GradientGrassmann(; tol, verbosity=verbosity_conv) + ψ, envs, δ = find_groundstate(ψ₀, H_lazy, alg) - @test abs(dot(ψ₀, ψ_lazy)) ≈ 1 atol = atol + @test abs(dot(ψ₀, ψ)) ≈ 1 atol = atol end end @@ -408,9 +412,9 @@ end @testset "leading_boundary" verbose = true begin tol = 1e-4 - verbosity = 0 + verbosity = verbosity_conv algs = [VUMPS(; tol, verbosity), VOMPS(; tol, verbosity), - GradientGrassmann(; verbosity)] + GradientGrassmann(; tol, verbosity)] mpo = force_planar(classical_ising()) ψ₀ = InfiniteMPS([ℙ^2], [ℙ^10]) @@ -428,7 +432,8 @@ end @testset "infinite (ham)" begin H = repeat(force_planar(heisenberg_XXX()), 2) ψ = InfiniteMPS([ℙ^3, ℙ^3], [ℙ^48, ℙ^48]) - ψ, envs, _ = find_groundstate(ψ, H; maxiter=400, verbosity=0, tol=1e-11) + ψ, envs, _ = find_groundstate(ψ, H; maxiter=400, verbosity=verbosity_conv, + tol=1e-10) energies, ϕs = excitations(H, QuasiparticleAnsatz(), Float64(pi), ψ, envs) @test energies[1] ≈ 0.41047925 atol = 1e-4 @test variance(ϕs[1], H) < 1e-8 @@ -436,14 +441,16 @@ end @testset "infinite (mpo)" begin H = repeat(sixvertex(), 2) ψ = InfiniteMPS([ℂ^2, ℂ^2], [ℂ^10, ℂ^10]) - ψ, envs, _ = leading_boundary(ψ, H, VUMPS(; maxiter=400, verbosity=0)) + ψ, envs, _ = leading_boundary(ψ, H, + VUMPS(; maxiter=400, verbosity=verbosity_conv, + tol=1e-10)) energies, ϕs = excitations(H, QuasiparticleAnsatz(), [0.0, Float64(pi / 2)], ψ, envs; verbosity=0) @test abs(energies[1]) > abs(energies[2]) # has a minimum at pi/2 end @testset "finite" begin - verbosity = 0 + verbosity = verbosity_conv H_inf = force_planar(transverse_field_ising()) ψ_inf = InfiniteMPS([ℙ^2], [ℙ^10]) ψ_inf, envs, _ = find_groundstate(ψ_inf, H_inf; maxiter=400, verbosity, tol=1e-9)