diff --git a/Project.toml b/Project.toml
index 18054ccf8..5d5c7a088 100644
--- a/Project.toml
+++ b/Project.toml
@@ -13,8 +13,8 @@ HalfIntegers = "f0d1745a-41c9-11e9-1dd9-e5d34d218721"
 KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36"
+OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5"
 OptimKit = "77e91f04-9b3b-57a6-a776-40b61faaebe0"
-Preferences = "21216c6a-2e73-6563-6e65-726566657250"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
@@ -35,10 +35,10 @@ HalfIntegers = "1.6.0"
 KrylovKit = "0.8.3"
 LinearAlgebra = "1.6"
 LoggingExtras = "~1.0"
+OhMyThreads = "0.7.0"
 OptimKit = "0.3.1"
 Pkg = "1"
 Plots = "1.40"
-Preferences = "1"
 Printf = "1"
 Random = "1"
 RecipesBase = "1.1"
diff --git a/docs/src/man/parallelism.md b/docs/src/man/parallelism.md
index 072d92f72..75bf42cc6 100644
--- a/docs/src/man/parallelism.md
+++ b/docs/src/man/parallelism.md
@@ -81,31 +81,22 @@ BLAS: libmkl_rt.so
 
 ## MPSKit multithreading
 
-Within MPSKit, when Julia is started with multiple threads, by default the `Threads.@spawn`
-machinery will be used to parallelize the code as much as possible. In particular, there are
-three main places where this is happening, which can be disabled separately through a preference-based system.
+Within MPSKit, when Julia is started with multiple threads, by default the `OhMyThreads.jl`
+machinery will be used to parallelize the code as much as possible. In particular, this mostly
+occurs whenever there is a unitcell and local updates can take place at each site in parallel.
 
-1. During the process of some algorithms (e.g. VUMPS), local updates can take place at each
-   site in parallel. This can be controlled by the `parallelize_sites` preference.
-
-2. During the calculation of the environments, when the MPO is block-sparse, it is possible
-   to parallelize over these blocks. This can be enabled or disabled by the
-   `parallelize_transfers` preference. (Note that left- and right environments will always
-   be computed in parallel)
-
-3. During the calculation of the derivatives, when the MPO is block-sparse, it is possible
-   to parallelize over these blocks. This can be enabled or disabled by the
-   `parallelize_derivatives` preference.
-
-For convenience, these preferences can be set via [`MPSKit.Defaults.set_parallelization`](@ref), which takes as input pairs of preferences and booleans. For example, to disable all parallelization, one can call
+The multithreading behaviour can be controlled through a global `scheduler`, which can be set
+using the `MPSKit.Defaults.set_scheduler!(arg; kwargs...)` function. This function accepts
+either a `Symbol`, an `OhMyThreads.Scheduler` or keywords to determine a scheduler automatically.
 
 ```julia
-Defaults.set_parallelization("sites" => false, "transfers" => false, "derivatives" => false)
+MPSKit.Defaults.set_scheduler!(:serial) # disable multithreading
+MPSKit.Defaults.set_scheduler!(:greedy) # multithreading with greedy load-balancing
+MPSKit.Defaults.set_scheduler!(:dynamic) # default: multithreading with some load-balancing
 ```
 
-!!! warning
-    These settings are statically set at compile-time, and for changes to take
-    effect the Julia session must be restarted.
+For further reference on the available schedulers and finer control, please refer to the 
+[`OhMyThreads.jl` documentation](https://juliafolds2.github.io/OhMyThreads.jl/stable/)
 
 ## TensorKit multithreading
 
diff --git a/src/MPSKit.jl b/src/MPSKit.jl
index b17881683..338b43b44 100644
--- a/src/MPSKit.jl
+++ b/src/MPSKit.jl
@@ -17,6 +17,7 @@ using LinearAlgebra: LinearAlgebra
 using Random
 using Base: @kwdef
 using LoggingExtras
+using OhMyThreads
 
 # bells and whistles for mpses
 export InfiniteMPS, FiniteMPS, WindowMPS, MultilineMPS
@@ -159,4 +160,9 @@ include("algorithms/ED.jl")
 
 include("algorithms/unionalg.jl")
 
+function __init__()
+    Defaults.set_scheduler!()
+    return nothing
+end
+
 end
diff --git a/src/algorithms/approximate/vomps.jl b/src/algorithms/approximate/vomps.jl
index 435996c4d..d8d6f5269 100644
--- a/src/algorithms/approximate/vomps.jl
+++ b/src/algorithms/approximate/vomps.jl
@@ -20,21 +20,14 @@ function approximate(ψ::MultilineMPS, toapprox::Tuple{<:MultilineMPO,<:Multilin
                      alg::VOMPS, envs=environments(ψ, toapprox))
     ϵ::Float64 = calc_galerkin(ψ, envs)
     temp_ACs = similar.(ψ.AC)
+    scheduler = Defaults.scheduler[]
     log = IterLog("VOMPS")
 
     LoggingExtras.withlevel(; alg.verbosity) do
         @infov 2 loginit!(log, ϵ)
         for iter in 1:(alg.maxiter)
-            @static if Defaults.parallelize_sites
-                @sync for col in 1:size(ψ, 2)
-                    Threads.@spawn begin
-                        temp_ACs[:, col] = _vomps_localupdate(col, ψ, toapprox, envs)
-                    end
-                end
-            else
-                for col in 1:size(ψ, 2)
-                    temp_ACs[:, col] = _vomps_localupdate(col, ψ, toapprox, envs)
-                end
+            tmap!(eachcol(temp_ACs), 1:size(ψ, 2); scheduler) do col
+                return _vomps_localupdate(col, ψ, toapprox, envs)
             end
 
             alg_gauge = updatetol(alg.alg_gauge, iter, ϵ)
@@ -64,7 +57,10 @@ end
 
 function _vomps_localupdate(loc, ψ, (O, ψ₀), envs, factalg=QRpos())
     local tmp_AC, tmp_C
-    @static if Defaults.parallelize_sites
+    if Defaults.scheduler[] isa SerialScheduler
+        tmp_AC = circshift([ac_proj(row, loc, ψ, envs) for row in 1:size(ψ, 1)], 1)
+        tmp_C = circshift([c_proj(row, loc, ψ, envs) for row in 1:size(ψ, 1)], 1)
+    else
         @sync begin
             Threads.@spawn begin
                 tmp_AC = circshift([ac_proj(row, loc, ψ, envs) for row in 1:size(ψ, 1)], 1)
@@ -73,9 +69,6 @@ function _vomps_localupdate(loc, ψ, (O, ψ₀), envs, factalg=QRpos())
                 tmp_C = circshift([c_proj(row, loc, ψ, envs) for row in 1:size(ψ, 1)], 1)
             end
         end
-    else
-        tmp_AC = circshift([ac_proj(row, loc, ψ, envs) for row in 1:size(ψ, 1)], 1)
-        tmp_C = circshift([c_proj(row, loc, ψ, envs) for row in 1:size(ψ, 1)], 1)
     end
     return regauge!.(tmp_AC, tmp_C; alg=factalg)
 end
diff --git a/src/algorithms/changebonds/svdcut.jl b/src/algorithms/changebonds/svdcut.jl
index 9850e1111..e7cc1d871 100644
--- a/src/algorithms/changebonds/svdcut.jl
+++ b/src/algorithms/changebonds/svdcut.jl
@@ -91,7 +91,7 @@ function changebonds(ψ::InfiniteMPS, alg::SvdCut)
     ψ = if space(ncr, 1) != space(copied[1], 1)
         InfiniteMPS(copied)
     else
-        C₀ = TensorMap(complex(ncr))
+        C₀ = ncr isa TensorMap ? complex(ncr) : TensorMap(complex(ncr))
         InfiniteMPS(copied, C₀)
     end
     return normalize!(ψ)
diff --git a/src/algorithms/derivatives.jl b/src/algorithms/derivatives.jl
index 0108df218..ad562a799 100644
--- a/src/algorithms/derivatives.jl
+++ b/src/algorithms/derivatives.jl
@@ -105,25 +105,6 @@ function ∂AC2(x::Vector, opp1, opp2, leftenv, rightenv)
     return circshift(map(∂AC2, x, opp1, opp2, leftenv, rightenv), 1)
 end
 
-"""
-    Zero-site derivative (the C matrix to the right of pos)
-"""
-function ∂C(x::MPSBondTensor, leftenv::AbstractVector, rightenv::AbstractVector)::typeof(x)
-    if Defaults.parallelize_derivatives
-        @floop WorkStealingEx() for (le, re) in zip(leftenv, rightenv)
-            t = ∂C(x, le, re)
-            @reduce(y = inplace_add!(nothing, t))
-        end
-    else
-        y = ∂C(x, leftenv[1], rightenv[1])
-        for (le, re) in zip(leftenv[2:end], rightenv[2:end])
-            VectorInterface.add!(y, ∂C(x, le, re))
-        end
-    end
-
-    return y
-end
-
 function ∂C(x::MPSBondTensor, leftenv::MPSTensor, rightenv::MPSTensor)
     @plansor y[-1; -2] := leftenv[-1 3; 1] * x[1; 2] * rightenv[2 3; -2]
     return y isa BlockTensorMap ? only(y) : y
diff --git a/src/algorithms/excitation/quasiparticleexcitation.jl b/src/algorithms/excitation/quasiparticleexcitation.jl
index a6158dab7..e93ddf961 100644
--- a/src/algorithms/excitation/quasiparticleexcitation.jl
+++ b/src/algorithms/excitation/quasiparticleexcitation.jl
@@ -247,19 +247,9 @@ function effective_excitation_hamiltonian(H::Union{InfiniteMPOHamiltonian,
                                                                                              envs.leftenvs,
                                                                                              envs.rightenvs))
     ϕ′ = similar(ϕ)
-    @static if Defaults.parallelize_sites
-        @sync for loc in 1:length(ϕ)
-            Threads.@spawn begin
-                ϕ′[loc] = _effective_excitation_local_apply(loc, ϕ, H, energy[loc],
-                                                            envs)
-            end
-        end
-    else
-        for loc in 1:length(ϕ)
-            ϕ′[loc] = _effective_excitation_local_apply(loc, ϕ, H, energy[loc], envs)
-        end
+    tforeach(1:length(ϕ); scheduler=Defaults.scheduler[]) do loc
+        return ϕ′[loc] = _effective_excitation_local_apply(loc, ϕ, H, energy[loc], envs)
     end
-
     return ϕ′
 end
 
diff --git a/src/algorithms/grassmann.jl b/src/algorithms/grassmann.jl
index b8f9f2e3a..b1efe5a42 100644
--- a/src/algorithms/grassmann.jl
+++ b/src/algorithms/grassmann.jl
@@ -12,6 +12,7 @@ module GrassmannMPS
 
 using ..MPSKit
 using TensorKit
+using OhMyThreads
 import TensorKitManifolds.Grassmann
 
 function TensorKit.rmul!(a::Grassmann.GrassmannTangent, b::AbstractTensorMap)
@@ -67,23 +68,38 @@ struct ManifoldPoint{T,E,G,C}
     Rhoreg::C # the regularized density matrices
 end
 
-function ManifoldPoint(state::Union{InfiniteMPS,FiniteMPS}, envs)
+function ManifoldPoint(state::FiniteMPS, envs)
     al_d = similar(state.AL)
     O = envs.operator
     for i in 1:length(state)
         al_d[i] = MPSKit.∂∂AC(i, state, O, envs) * state.AC[i]
     end
-
     g = Grassmann.project.(al_d, state.AL)
 
     Rhoreg = Vector{eltype(state.C)}(undef, length(state))
     δmin = sqrt(eps(real(scalartype(state))))
-    for i in 1:length(state)
-        Rhoreg[i] = regularize(state.C[i], max(norm(g[i]) / 10, δmin))
+    tmap!(Rhoreg, 1:length(state); scheduler=MPSKit.Defaults.scheduler[]) do i
+        return regularize(state.C[i], max(norm(g[i]) / 10, δmin))
     end
 
     return ManifoldPoint(state, envs, g, Rhoreg)
 end
+function ManifoldPoint(state::InfiniteMPS, envs)
+    δmin = sqrt(eps(real(scalartype(state))))
+    Tg = Core.Compiler.return_type(Grassmann.project,
+                                   Tuple{eltype(state.AL),eltype(state.AL)})
+    g = similar(state.AL, Tg)
+    ρ = similar(state.C)
+
+    MPSKit.check_recalculate!(envs, state)
+    tforeach(1:length(state); scheduler=MPSKit.Defaults.scheduler[]) do i
+        AC′ = MPSKit.∂∂AC(i, state, envs.operator, envs) * state.AC[i]
+        g[i] = Grassmann.project(AC′, state.AL[i])
+        ρ[i] = regularize(state.C[i], max(norm(g[i]) / 10, δmin))
+        return nothing
+    end
+    return ManifoldPoint(state, envs, g, ρ)
+end
 
 function ManifoldPoint(state::MultilineMPS, envs)
     # FIXME: add support for unitcells
@@ -115,10 +131,10 @@ cell as tangent vectors on Grassmann manifolds.
 """
 function fg(x::ManifoldPoint{T}) where {T<:Union{InfiniteMPS,FiniteMPS}}
     # the gradient I want to return is the preconditioned gradient!
-    g_prec = Vector{PrecGrad{eltype(x.g),eltype(x.Rhoreg)}}(undef, length(x.g))
-
-    for i in 1:length(x.state)
-        g_prec[i] = PrecGrad(rmul!(copy(x.g[i]), x.state.C[i]'), x.Rhoreg[i])
+    Tg = Core.Compiler.return_type(PrecGrad, Tuple{eltype(x.g),eltype(x.Rhoreg)})
+    g_prec = similar(x.g, Tg)
+    tmap!(g_prec, eachindex(x.g); scheduler=MPSKit.Defaults.scheduler[]) do i
+        return PrecGrad(rmul!(copy(x.g[i]), x.state.C[i]'), x.Rhoreg[i])
     end
 
     # TODO: the operator really should not be part of the environments, and this should
@@ -151,10 +167,10 @@ function retract(x::ManifoldPoint{<:MultilineMPS}, tg, alpha)
     g = reshape(tg, size(x.state))
 
     nal = similar(x.state.AL)
-    h = similar(g)
-    for (i, cg) in enumerate(tg)
-        (nal[i], th) = Grassmann.retract(x.state.AL[i], cg.Pg, alpha)
-        h[i] = PrecGrad(th)
+    h = similar(tg)
+    tmap!(h, eachindex(g); scheduler=MPSKit.Defaults.scheduler[]) do i
+        nal[i], th = Grassmann.retract(x.state.AL[i], g[i].Pg, alpha)
+        return PrecGrad(th)
     end
 
     nstate = MPSKit.MultilineMPS(nal, x.state.C[:, end])
@@ -171,9 +187,10 @@ function retract(x::ManifoldPoint{<:InfiniteMPS}, g, alpha)
     envs = x.envs
     nal = similar(state.AL)
     h = similar(g)  # The tangent at the end-point
-    for i in 1:length(g)
+
+    tmap!(h, eachindex(g); scheduler=MPSKit.Defaults.scheduler[]) do i
         nal[i], th = Grassmann.retract(state.AL[i], g[i].Pg, alpha)
-        h[i] = PrecGrad(th)
+        return PrecGrad(th)
     end
 
     nstate = InfiniteMPS(nal, state.C[end])
@@ -209,9 +226,9 @@ Transport a tangent vector `h` along the retraction from `x` in direction `g` by
 `alpha`. `xp` is the end-point of the retraction.
 """
 function transport!(h, x, g, alpha, xp)
-    for i in 1:length(h)
-        h[i] = PrecGrad(Grassmann.transport!(h[i].Pg, x.state.AL[i], g[i].Pg, alpha,
-                                             xp.state.AL[i]))
+    tforeach(1:length(h); scheduler=MPSKit.Defaults.scheduler[]) do i
+        return h[i] = PrecGrad(Grassmann.transport!(h[i].Pg, x.state.AL[i], g[i].Pg, alpha,
+                                                    xp.state.AL[i]))
     end
     return h
 end
diff --git a/src/algorithms/groundstate/gradient_grassmann.jl b/src/algorithms/groundstate/gradient_grassmann.jl
index 8ee1e37c9..df0c3d15d 100644
--- a/src/algorithms/groundstate/gradient_grassmann.jl
+++ b/src/algorithms/groundstate/gradient_grassmann.jl
@@ -28,13 +28,17 @@ struct GradientGrassmann{O<:OptimKit.OptimizationAlgorithm,F} <: Algorithm
     finalize!::F
 
     function GradientGrassmann(; method=ConjugateGradient, (finalize!)=OptimKit._finalize!,
-                               tol=Defaults.tol, maxiter=Defaults.maxiter, verbosity=2)
+                               tol=Defaults.tol, maxiter=Defaults.maxiter,
+                               verbosity=Defaults.verbosity - 1)
         if isa(method, OptimKit.OptimizationAlgorithm)
             # We were given an optimisation method, just use it.
             m = method
         elseif method <: OptimKit.OptimizationAlgorithm
             # We were given an optimisation method type, construct an instance of it.
-            m = method(; maxiter=maxiter, verbosity=verbosity, gradtol=tol)
+            # restrict linesearch maxiter
+            linesearch = OptimKit.HagerZhangLineSearch(; verbosity=verbosity - 2,
+                                                       maxiter=100)
+            m = method(; maxiter, verbosity, gradtol=tol, linesearch)
         else
             msg = "method should be either an instance or a subtype of `OptimKit.OptimizationAlgorithm`."
             throw(ArgumentError(msg))
diff --git a/src/algorithms/groundstate/vumps.jl b/src/algorithms/groundstate/vumps.jl
index 0957b841b..7f96bfae3 100644
--- a/src/algorithms/groundstate/vumps.jl
+++ b/src/algorithms/groundstate/vumps.jl
@@ -30,22 +30,15 @@ function find_groundstate(ψ::InfiniteMPS, H, alg::VUMPS, envs=environments(ψ,
     # initialization
     ϵ::Float64 = calc_galerkin(ψ, envs)
     temp_ACs = similar.(ψ.AC)
+    scheduler = Defaults.scheduler[]
     log = IterLog("VUMPS")
 
     LoggingExtras.withlevel(; alg.verbosity) do
         @infov 2 loginit!(log, ϵ, sum(expectation_value(ψ, H, envs)))
         for iter in 1:(alg.maxiter)
             alg_eigsolve = updatetol(alg.alg_eigsolve, iter, ϵ)
-            @static if Defaults.parallelize_sites
-                @sync for loc in 1:length(ψ)
-                    Threads.@spawn begin
-                        temp_ACs[loc] = _vumps_localupdate(loc, ψ, H, envs, alg_eigsolve)
-                    end
-                end
-            else
-                for loc in 1:length(ψ)
-                    temp_ACs[loc] = _vumps_localupdate(loc, ψ, H, envs, alg_eigsolve)
-                end
+            tmap!(temp_ACs, 1:length(ψ); scheduler) do loc
+                return _vumps_localupdate(loc, ψ, H, envs, alg_eigsolve)
             end
 
             alg_gauge = updatetol(alg.alg_gauge, iter, ϵ)
@@ -76,7 +69,10 @@ end
 
 function _vumps_localupdate(loc, ψ, H, envs, eigalg, factalg=QRpos())
     local AC′, C′
-    @static if Defaults.parallelize_sites
+    if Defaults.scheduler[] isa SerialScheduler
+        _, AC′ = fixedpoint(∂∂AC(loc, ψ, H, envs), ψ.AC[loc], :SR, eigalg)
+        _, C′ = fixedpoint(∂∂C(loc, ψ, H, envs), ψ.C[loc], :SR, eigalg)
+    else
         @sync begin
             Threads.@spawn begin
                 _, AC′ = fixedpoint(∂∂AC(loc, ψ, H, envs), ψ.AC[loc], :SR, eigalg)
@@ -85,9 +81,6 @@ function _vumps_localupdate(loc, ψ, H, envs, eigalg, factalg=QRpos())
                 _, C′ = fixedpoint(∂∂C(loc, ψ, H, envs), ψ.C[loc], :SR, eigalg)
             end
         end
-    else
-        _, AC′ = fixedpoint(∂∂AC(loc, ψ, H, envs), ψ.AC[loc], :SR, eigalg)
-        _, C′ = fixedpoint(∂∂C(loc, ψ, H, envs), ψ.C[loc], :SR, eigalg)
     end
     return regauge!(AC′, C′; alg=factalg)
 end
diff --git a/src/algorithms/statmech/vomps.jl b/src/algorithms/statmech/vomps.jl
index e9ad4b9aa..8116e0869 100644
--- a/src/algorithms/statmech/vomps.jl
+++ b/src/algorithms/statmech/vomps.jl
@@ -28,39 +28,16 @@ function leading_boundary(ψ::MultilineMPS, O::MultilineMPO, alg::VOMPS,
                           envs=environments(ψ, O))
     ϵ::Float64 = calc_galerkin(ψ, envs)
     temp_ACs = similar.(ψ.AC)
-    temp_Cs = similar.(ψ.C)
+    scheduler = Defaults.scheduler[]
     log = IterLog("VOMPS")
 
     LoggingExtras.withlevel(; alg.verbosity) do
         @infov 2 loginit!(log, ϵ, expectation_value(ψ, O, envs))
         for iter in 1:(alg.maxiter)
-            @static if Defaults.parallelize_sites
-                @sync for col in 1:size(ψ, 2)
-                    Threads.@spawn begin
-                        H_AC = ∂∂AC(col, ψ, O, envs)
-                        ac = ψ.AC[:, col]
-                        temp_ACs[:, col] .= H_AC(ac)
-                    end
-
-                    Threads.@spawn begin
-                        H_C = ∂∂C(col, ψ, O, envs)
-                        c = ψ.C[:, col]
-                        temp_Cs[:, col] .= H_C(c)
-                    end
-                end
-            else
-                for col in 1:size(ψ, 2)
-                    H_AC = ∂∂AC(col, ψ, O, envs)
-                    ac = ψ.AC[:, col]
-                    temp_ACs[:, col] .= H_AC(ac)
-
-                    H_C = ∂∂C(col, ψ, O, envs)
-                    c = ψ.C[:, col]
-                    temp_Cs[:, col] .= H_C(c)
-                end
+            tmap!(eachcol(temp_ACs), 1:size(ψ, 2); scheduler) do col
+                return _vomps_localupdate(col, ψ, O, envs)
             end
 
-            regauge!.(temp_ACs, temp_Cs; alg=TensorKit.QRpos())
             alg_gauge = updatetol(alg.alg_gauge, iter, ϵ)
             ψ = MultilineMPS(temp_ACs, ψ.C[:, end]; alg_gauge.tol, alg_gauge.maxiter)
 
@@ -85,3 +62,21 @@ function leading_boundary(ψ::MultilineMPS, O::MultilineMPO, alg::VOMPS,
 
     return ψ, envs, ϵ
 end
+
+function _vomps_localupdate(col, ψ::MultilineMPS, O::MultilineMPO, envs, factalg=QRpos())
+    local AC′, C′
+    if Defaults.scheduler[] isa SerialScheduler
+        AC′ = ∂∂AC(col, ψ, O, envs) * ψ.AC[:, col]
+        C′ = ∂∂C(col, ψ, O, envs) * ψ.C[:, col]
+    else
+        @sync begin
+            Threads.@spawn begin
+                AC′ = ∂∂AC(col, ψ, O, envs) * ψ.AC[:, col]
+            end
+            Threads.@spawn begin
+                C′ = ∂∂C(col, ψ, O, envs) * ψ.C[:, col]
+            end
+        end
+    end
+    return regauge!.(AC′, C′; alg=factalg)[:]
+end
diff --git a/src/algorithms/statmech/vumps.jl b/src/algorithms/statmech/vumps.jl
index e6e173fcf..e9baf7d6b 100644
--- a/src/algorithms/statmech/vumps.jl
+++ b/src/algorithms/statmech/vumps.jl
@@ -20,44 +20,16 @@ end
 function leading_boundary(ψ::MultilineMPS, H, alg::VUMPS, envs=environments(ψ, H))
     ϵ::Float64 = calc_galerkin(ψ, envs)
     temp_ACs = similar.(ψ.AC)
-    temp_Cs = similar.(ψ.C)
+    scheduler = Defaults.scheduler[]
     log = IterLog("VUMPS")
 
     LoggingExtras.withlevel(; alg.verbosity) do
         @infov 2 loginit!(log, ϵ, expectation_value(ψ, H, envs))
         for iter in 1:(alg.maxiter)
             alg_eigsolve = updatetol(alg.alg_eigsolve, iter, ϵ)
-            @static if Defaults.parallelize_sites
-                @sync for col in 1:size(ψ, 2)
-                    Threads.@spawn begin
-                        H_AC = ∂∂AC($col, $ψ, $H, $envs)
-                        ac = $ψ.AC[:, col]
-                        _, ac′ = fixedpoint(H_AC, ac, :LM, alg_eigsolve)
-                        $temp_ACs[:, col] = ac′[:]
-                    end
-
-                    Threads.@spawn begin
-                        H_C = ∂∂C($col, $ψ, $H, $envs)
-                        c = $ψ.C[:, col]
-                        _, c′ = fixedpoint(H_C, c, :LM, alg_eigsolve)
-                        $temp_Cs[:, col] = c′[:]
-                    end
-                end
-            else
-                for col in 1:size(ψ, 2)
-                    H_AC = ∂∂AC(col, ψ, H, envs)
-                    ac = ψ.AC[:, col]
-                    _, ac′ = fixedpoint(H_AC, ac, :LM, alg_eigsolve)
-                    temp_ACs[:, col] = ac′[:]
-
-                    H_C = ∂∂C(col, ψ, H, envs)
-                    c = ψ.C[:, col]
-                    _, c′ = fixedpoint(H_C, c, :LM, alg_eigsolve)
-                    temp_Cs[:, col] = c′[:]
-                end
+            tmap!(eachcol(temp_ACs), 1:size(ψ, 2); scheduler) do col
+                return _vumps_localupdate(col, ψ, H, envs, alg_eigsolve)
             end
-
-            regauge!.(temp_ACs, temp_Cs; alg=TensorKit.QRpos())
             alg_gauge = updatetol(alg.alg_gauge, iter, ϵ)
             ψ = MultilineMPS(temp_ACs, ψ.C[:, end]; alg_gauge.tol, alg_gauge.maxiter)
 
@@ -82,3 +54,22 @@ function leading_boundary(ψ::MultilineMPS, H, alg::VUMPS, envs=environments(ψ,
 
     return ψ, envs, ϵ
 end
+
+function _vumps_localupdate(col, ψ::MultilineMPS, O::MultilineMPO, envs, eigalg,
+                            factalg=QRpos())
+    local AC′, C′
+    if Defaults.scheduler[] isa SerialScheduler
+        _, AC′ = fixedpoint(∂∂AC(col, ψ, O, envs), ψ.AC[:, col], :LM, eigalg)
+        _, C′ = fixedpoint(∂∂C(col, ψ, O, envs), ψ.C[:, col], :LM, eigalg)
+    else
+        @sync begin
+            Threads.@spawn begin
+                _, AC′ = fixedpoint(∂∂AC(col, ψ, O, envs), ψ.AC[:, col], :LM, eigalg)
+            end
+            Threads.@spawn begin
+                _, C′ = fixedpoint(∂∂C(col, ψ, O, envs), ψ.C[:, col], :LM, eigalg)
+            end
+        end
+    end
+    return regauge!.(AC′, C′; alg=factalg)[:]
+end
diff --git a/src/algorithms/timestep/tdvp.jl b/src/algorithms/timestep/tdvp.jl
index d4d3adf44..09fe6c8f3 100644
--- a/src/algorithms/timestep/tdvp.jl
+++ b/src/algorithms/timestep/tdvp.jl
@@ -22,35 +22,38 @@ function timestep(ψ::InfiniteMPS, H, t::Number, dt::Number, alg::TDVP,
                   envs::AbstractMPSEnvironments=environments(ψ, H);
                   leftorthflag=true)
     temp_ACs = similar(ψ.AC)
-    temp_CRs = similar(ψ.C)
+    temp_Cs = similar(ψ.C)
 
-    @static if Defaults.parallelize_sites
-        @sync for (loc, (ac, c)) in enumerate(zip(ψ.AC, ψ.C))
+    scheduler = Defaults.scheduler[]
+    if scheduler isa SerialScheduler
+        temp_ACs = tmap!(temp_ACs, 1:length(ψ); scheduler) do loc
+            return integrate(∂∂AC(loc, ψ, H, envs), ψ.AC[loc], t, dt, alg.integrator)
+        end
+        temp_Cs = tmap!(temp_Cs, 1:length(ψ); scheduler) do loc
+            return integrate(∂∂C(loc, ψ, H, envs), ψ.C[loc], t, dt, alg.integrator)
+        end
+    else
+        @sync begin
             Threads.@spawn begin
-                h_ac = ∂∂AC(loc, ψ, H, envs)
-                temp_ACs[loc] = integrate(h_ac, ac, t, dt, alg.integrator)
+                temp_ACs = tmap!(temp_ACs, 1:length(ψ); scheduler) do loc
+                    return integrate(∂∂AC(loc, ψ, H, envs), ψ.AC[loc], t, dt,
+                                     alg.integrator)
+                end
             end
-
             Threads.@spawn begin
-                h_c = ∂∂C(loc, ψ, H, envs)
-                temp_CRs[loc] = integrate(h_c, c, t, dt, alg.integrator)
+                temp_Cs = tmap!(temp_Cs, 1:length(ψ); scheduler) do loc
+                    return integrate(∂∂C(loc, ψ, H, envs), ψ.C[loc], t, dt, alg.integrator)
+                end
             end
         end
-    else
-        for (loc, (ac, c)) in enumerate(zip(ψ.AC, ψ.C))
-            h_ac = ∂∂AC(loc, ψ, H, envs)
-            temp_ACs[loc] = integrate(h_ac, ac, t, dt, alg.integrator)
-            h_c = ∂∂C(loc, ψ, H, envs)
-            temp_CRs[loc] = integrate(h_c, c, t, dt, alg.integrator)
-        end
     end
 
     if leftorthflag
-        regauge!.(temp_ACs, temp_CRs; alg=TensorKit.QRpos())
+        regauge!.(temp_ACs, temp_Cs; alg=TensorKit.QRpos())
         ψ′ = InfiniteMPS(temp_ACs, ψ.C[end]; tol=alg.tolgauge, maxiter=alg.gaugemaxiter)
     else
-        circshift!(temp_CRs, 1)
-        regauge!.(temp_CRs, temp_ACs; alg=TensorKit.LQpos())
+        circshift!(temp_Cs, 1)
+        regauge!.(temp_Cs, temp_ACs; alg=TensorKit.LQpos())
         ψ′ = InfiniteMPS(ψ.C[0], temp_ACs; tol=alg.tolgauge, maxiter=alg.gaugemaxiter)
     end
 
diff --git a/src/environments/abstract_envs.jl b/src/environments/abstract_envs.jl
index 21965322f..f7ce69df0 100644
--- a/src/environments/abstract_envs.jl
+++ b/src/environments/abstract_envs.jl
@@ -94,7 +94,7 @@ function recalculate!(envs::AbstractInfiniteEnvironments, state; tol=envs.solver
     end
 
     solver = envs.solver
-    envs.solver = solver.tol == tol ? solver : @set solver.tol = tol
+    envs.solver = solver.tol == tol ? solver : Accessors.@set solver.tol = tol
     envs.dependency = state
 
     @sync begin
diff --git a/src/environments/multiple_envs.jl b/src/environments/multiple_envs.jl
index ac4f8a4d4..6eb2a75b3 100644
--- a/src/environments/multiple_envs.jl
+++ b/src/environments/multiple_envs.jl
@@ -59,6 +59,13 @@ function recalculate!(env::MultipleEnvironments, args...; kwargs...)
     return env
 end
 
+function check_recalculate!(env::MultipleEnvironments, args...; kwargs...)
+    for subenv in env.envs
+        check_recalculate!(subenv, args...; kwargs...)
+    end
+    return env
+end
+
 #maybe this can be used to provide compatibility with existing code?
 function Base.getproperty(envs::MultipleEnvironments, prop::Symbol)
     if prop === :solver
diff --git a/src/utility/defaults.jl b/src/utility/defaults.jl
index 4ea6552f4..ecd74e0fb 100644
--- a/src/utility/defaults.jl
+++ b/src/utility/defaults.jl
@@ -5,8 +5,8 @@ Some default values and settings for MPSKit.
 """
 module Defaults
 
-using Preferences
 import KrylovKit: GMRES, Arnoldi, Lanczos
+using OhMyThreads
 using ..MPSKit: DynamicTol
 
 const VERBOSE_NONE = 0
@@ -66,29 +66,30 @@ function alg_expsolve(; tol=tol, maxiter=maxiter, verbosity=0,
            Arnoldi(; tol, maxiter, krylovdim, verbosity)
 end
 
-# Preferences
-# -----------
+"""
+   const scheduler
 
-function set_parallelization(options::Pair{String,Bool}...)
-    for (key, val) in options
-        if !(key in ("sites", "derivatives", "transfers"))
-            throw(ArgumentError("Invalid option: \"$(key)\""))
-        end
+A scoped value that controls the current settings for multi-threading, typically used to parallelize over unitcells.
+This value is best controlled using [`set_scheduler!`](@ref).
+"""
+const scheduler = Ref{Scheduler}()
 
-        @set_preferences!("parallelize_$key" => val)
-    end
+"""
+    set_scheduler!([scheduler]; kwargs...)
 
-    sites = @load_preference("parallelize_sites", nothing)
-    derivatives = @load_preference("parallelize_derivatives", nothing)
-    transfers = @load_preference("parallelize_transfers", nothing)
-    @info "Parallelization changed; restart your Julia session for this change to take effect!" sites derivatives transfers
-    return nothing
-end
+Set the `OhMyThreads` multi-threading scheduler parameters.
 
-const parallelize_sites = @load_preference("parallelize_sites", Threads.nthreads() > 1)
-const parallelize_derivatives = @load_preference("parallelize_derivatives",
-                                                 Threads.nthreads() > 1)
-const parallelize_transfers = @load_preference("parallelize_transfers",
-                                               Threads.nthreads() > 1)
+The function either accepts a `scheduler` as an `OhMyThreads.Scheduler` or as a symbol where the corresponding parameters are specificed as keyword arguments.
+For a detailed description of all schedulers and their keyword arguments consult the [`OhMyThreads` documentation](https://juliafolds2.github.io/OhMyThreads.jl/stable/refs/api/#Schedulers).
+"""
+function set_scheduler!(sc=OhMyThreads.Implementation.NotGiven(); kwargs...)
+    if isempty(kwargs) && sc isa OhMyThreads.Implementation.NotGiven
+        # default value: Serial if single-threaded, Dynamic otherwise
+        scheduler[] = Threads.nthreads() == 1 ? SerialScheduler() : DynamicScheduler()
+    else
+        scheduler[] = OhMyThreads.Implementation._scheduler_from_userinput(sc; kwargs...)
+    end
+    return scheduler[]
+end
 
 end
diff --git a/src/utility/dynamictols.jl b/src/utility/dynamictols.jl
index 74b2665c5..34c3190d8 100644
--- a/src/utility/dynamictols.jl
+++ b/src/utility/dynamictols.jl
@@ -54,7 +54,7 @@ end
 
 # default implementation with Accessors.jl, but can be hooked into
 function _updatetol(alg, tol::Real)
-    return @set alg.tol = tol
+    return Accessors.@set alg.tol = tol
 end
 
 end
diff --git a/test/algorithms.jl b/test/algorithms.jl
index a596a093a..6d85fa129 100644
--- a/test/algorithms.jl
+++ b/test/algorithms.jl
@@ -188,7 +188,7 @@ end
 
     # test using XXZ model, Δ > 1 is gapped
     spin = 1
-    local_operators = [S_xx(; spin), S_yy(; spin), 0.7 * S_zz(; spin)]
+    local_operators = [S_xx(; spin), S_yy(; spin), 1.7 * S_zz(; spin)]
     Pspace = space(local_operators[1], 1)
     lattice = fill(Pspace, L)
 
@@ -205,23 +205,24 @@ end
     @testset "DMRG" begin
         # test logging passes
         ψ, envs, δ = find_groundstate(ψ₀, H_lazy,
-                                      DMRG(; tol, verbosity=5, maxiter=1))
+                                      DMRG(; tol, verbosity=verbosity_full, maxiter=1))
 
         # compare states
-        alg = DMRG(; tol, verbosity=1)
-        ψ_lazy, envs, δ = find_groundstate(ψ₀, H_lazy, alg)
+        alg = DMRG(; tol, verbosity=verbosity_conv)
+        ψ, envs, δ = find_groundstate(ψ, H_lazy, alg)
 
-        @test abs(dot(ψ₀, ψ_lazy)) ≈ 1 atol = atol
+        @test abs(dot(ψ₀, ψ)) ≈ 1 atol = atol
     end
 
     @testset "DMRG2" begin
         # test logging passes
         trscheme = truncdim(floor(Int, D * 1.5))
         ψ, envs, δ = find_groundstate(ψ₀, H_lazy,
-                                      DMRG2(; tol, verbosity=5, maxiter=1, trscheme))
+                                      DMRG2(; tol, verbosity=verbosity_full, maxiter=1,
+                                            trscheme))
 
         # compare states
-        alg = DMRG2(; tol, verbosity=1, trscheme)
+        alg = DMRG2(; tol, verbosity=verbosity_conv, trscheme)
         ψ, = find_groundstate(ψ₀, H, alg)
         ψ_lazy, envs, δ = find_groundstate(ψ₀, H_lazy, alg)
 
@@ -231,10 +232,11 @@ end
     @testset "GradientGrassmann" begin
         # test logging passes
         ψ, envs, δ = find_groundstate(ψ₀, H_lazy,
-                                      GradientGrassmann(; tol, verbosity=5, maxiter=2))
+                                      GradientGrassmann(; tol, verbosity=verbosity_full,
+                                                        maxiter=2))
 
         # compare states
-        alg = GradientGrassmann(; tol, verbosity=1)
+        alg = GradientGrassmann(; tol, verbosity=verbosity_conv)
         ψ, = find_groundstate(ψ₀, H, alg)
         ψ_lazy, envs, δ = find_groundstate(ψ₀, H_lazy, alg)
 
@@ -244,12 +246,11 @@ end
 
 @testset "LazySum InfiniteMPS groundstate" verbose = true begin
     tol = 1e-8
-    D = 15
+    D = 16
     atol = 1e-2
 
-    # test using XXZ model, Δ > 1 is gapped
     spin = 1
-    local_operators = [S_xx(; spin), S_yy(; spin), S_zz(; spin)]
+    local_operators = [S_xx(; spin), S_yy(; spin), 0.7 * S_zz(; spin)]
     Pspace = space(local_operators[1], 1)
     lattice = PeriodicVector([Pspace])
     mpo_hamiltonians = map(local_operators) do O
@@ -264,24 +265,26 @@ end
 
     @testset "VUMPS" begin
         # test logging passes
-        ψ, envs, δ = find_groundstate(ψ₀, H_lazy, VUMPS(; tol, verbosity=5, maxiter=2))
+        ψ, envs, δ = find_groundstate(ψ₀, H_lazy,
+                                      VUMPS(; tol, verbosity=verbosity_full, maxiter=2))
 
         # compare states
-        alg = VUMPS(; tol, verbosity=2)
-        ψ_lazy, envs, δ = find_groundstate(ψ₀, H_lazy, alg)
+        alg = VUMPS(; tol, verbosity=verbosity_conv)
+        ψ, envs, δ = find_groundstate(ψ, H_lazy, alg)
 
-        @test abs(dot(ψ₀, ψ_lazy)) ≈ 1 atol = atol
+        @test abs(dot(ψ₀, ψ)) ≈ 1 atol = atol
     end
 
     @testset "IDMRG1" begin
         # test logging passes
-        ψ, envs, δ = find_groundstate(ψ₀, H_lazy, IDMRG1(; tol, verbosity=5, maxiter=2))
+        ψ, envs, δ = find_groundstate(ψ₀, H_lazy,
+                                      IDMRG1(; tol, verbosity=verbosity_full, maxiter=2))
 
         # compare states
-        alg = IDMRG1(; tol, verbosity=2)
-        ψ_lazy, envs, δ = find_groundstate(ψ₀, H_lazy, alg)
+        alg = IDMRG1(; tol, verbosity=verbosity_conv, maxiter=300)
+        ψ, envs, δ = find_groundstate(ψ, H_lazy, alg)
 
-        @test abs(dot(ψ₀, ψ_lazy)) ≈ 1 atol = atol
+        @test abs(dot(ψ₀, ψ)) ≈ 1 atol = atol
     end
 
     @testset "IDMRG2" begin
@@ -292,26 +295,27 @@ end
         trscheme = truncdim(floor(Int, D * 1.5))
         # test logging passes
         ψ, envs, δ = find_groundstate(ψ₀′, H_lazy′,
-                                      IDMRG2(; tol, verbosity=5, maxiter=2, trscheme))
+                                      IDMRG2(; tol, verbosity=verbosity_full, maxiter=2,
+                                             trscheme))
 
         # compare states
-        alg = IDMRG2(; tol, verbosity=2, trscheme)
-        ψ_lazy, envs, δ = find_groundstate(ψ₀′, H_lazy′, alg)
+        alg = IDMRG2(; tol, verbosity=verbosity_conv, trscheme)
+        ψ, envs, δ = find_groundstate(ψ, H_lazy′, alg)
 
-        @test abs(dot(ψ₀′, ψ_lazy)) ≈ 1 atol = atol
+        @test abs(dot(ψ₀′, ψ)) ≈ 1 atol = atol
     end
 
     @testset "GradientGrassmann" begin
         # test logging passes
         ψ, envs, δ = find_groundstate(ψ₀, H_lazy,
-                                      GradientGrassmann(; tol, verbosity=5, maxiter=2))
+                                      GradientGrassmann(; tol, verbosity=verbosity_full,
+                                                        maxiter=2))
 
         # compare states
-        alg = GradientGrassmann(; tol, verbosity=1)
-        ψ_lazy, envs, δ = find_groundstate(ψ₀, H_lazy, alg)
-        ψ, = find_groundstate(ψ₀, H, alg)
+        alg = GradientGrassmann(; tol, verbosity=verbosity_conv)
+        ψ, envs, δ = find_groundstate(ψ₀, H_lazy, alg)
 
-        @test abs(dot(ψ₀, ψ_lazy)) ≈ 1 atol = atol
+        @test abs(dot(ψ₀, ψ)) ≈ 1 atol = atol
     end
 end
 
@@ -408,9 +412,9 @@ end
 
 @testset "leading_boundary" verbose = true begin
     tol = 1e-4
-    verbosity = 0
+    verbosity = verbosity_conv
     algs = [VUMPS(; tol, verbosity), VOMPS(; tol, verbosity),
-            GradientGrassmann(; verbosity)]
+            GradientGrassmann(; tol, verbosity)]
     mpo = force_planar(classical_ising())
 
     ψ₀ = InfiniteMPS([ℙ^2], [ℙ^10])
@@ -428,7 +432,8 @@ end
     @testset "infinite (ham)" begin
         H = repeat(force_planar(heisenberg_XXX()), 2)
         ψ = InfiniteMPS([ℙ^3, ℙ^3], [ℙ^48, ℙ^48])
-        ψ, envs, _ = find_groundstate(ψ, H; maxiter=400, verbosity=0, tol=1e-11)
+        ψ, envs, _ = find_groundstate(ψ, H; maxiter=400, verbosity=verbosity_conv,
+                                      tol=1e-10)
         energies, ϕs = excitations(H, QuasiparticleAnsatz(), Float64(pi), ψ, envs)
         @test energies[1] ≈ 0.41047925 atol = 1e-4
         @test variance(ϕs[1], H) < 1e-8
@@ -436,14 +441,16 @@ end
     @testset "infinite (mpo)" begin
         H = repeat(sixvertex(), 2)
         ψ = InfiniteMPS([ℂ^2, ℂ^2], [ℂ^10, ℂ^10])
-        ψ, envs, _ = leading_boundary(ψ, H, VUMPS(; maxiter=400, verbosity=0))
+        ψ, envs, _ = leading_boundary(ψ, H,
+                                      VUMPS(; maxiter=400, verbosity=verbosity_conv,
+                                            tol=1e-10))
         energies, ϕs = excitations(H, QuasiparticleAnsatz(), [0.0, Float64(pi / 2)], ψ,
                                    envs; verbosity=0)
         @test abs(energies[1]) > abs(energies[2]) # has a minimum at pi/2
     end
 
     @testset "finite" begin
-        verbosity = 0
+        verbosity = verbosity_conv
         H_inf = force_planar(transverse_field_ising())
         ψ_inf = InfiniteMPS([ℙ^2], [ℙ^10])
         ψ_inf, envs, _ = find_groundstate(ψ_inf, H_inf; maxiter=400, verbosity, tol=1e-9)