Skip to content

Commit a946e2e

Browse files
committed
Performance improvemnts
- got rid of all allocations in calc_chigamma_impr - imrpoved performance of threaded version - loops reordered
1 parent 7cc4ced commit a946e2e

File tree

3 files changed

+38
-31
lines changed

3 files changed

+38
-31
lines changed

Manifest.toml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
julia_version = "1.11.5"
44
manifest_format = "2.0"
5-
project_hash = "9ea45be61937a755ee6757eec725af4882f38692"
5+
project_hash = "17cb083765589e55555c6f9f8a12f6c841814a84"
66

77
[[deps.ANSIColoredPrinters]]
88
git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
@@ -126,11 +126,11 @@ version = "1.11.0"
126126

127127
[[deps.BSE_Asymptotics]]
128128
deps = ["ColorSchemes", "Documenter", "JLD2", "LinearAlgebra", "OffsetArrays", "Test", "TimerOutputs"]
129-
git-tree-sha1 = "7bda0c84f387e2ef73e0523b25cb144346389396"
129+
git-tree-sha1 = "df23f5ec4ec8bc5456e1734ca539954237ffffa0"
130130
repo-rev = "master"
131131
repo-url = "https://github.com/Atomtomate/BSE_Asymptotics.jl"
132132
uuid = "646ec391-26e4-4808-a965-02314d3a2269"
133-
version = "3.0.2"
133+
version = "3.3.0"
134134

135135
[[deps.Base64]]
136136
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
@@ -309,9 +309,9 @@ version = "0.9.4"
309309

310310
[[deps.Documenter]]
311311
deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "CodecZlib", "Dates", "DocStringExtensions", "Downloads", "Git", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "MarkdownAST", "Pkg", "PrecompileTools", "REPL", "RegistryInstances", "SHA", "TOML", "Test", "Unicode"]
312-
git-tree-sha1 = "9d733459cea04dcf1c41522ec25c31576387be8a"
312+
git-tree-sha1 = "b7af952d4701252dc45d3b0025693e9cb4dedcd8"
313313
uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
314-
version = "1.10.1"
314+
version = "1.10.2"
315315

316316
[[deps.DocumenterTools]]
317317
deps = ["AbstractTrees", "Base64", "DocStringExtensions", "Documenter", "FileWatching", "Git", "Gumbo", "LibGit2", "OpenSSH_jll", "Sass"]
@@ -749,9 +749,9 @@ version = "0.12.3"
749749

750750
[[deps.Parsers]]
751751
deps = ["Dates", "PrecompileTools", "UUIDs"]
752-
git-tree-sha1 = "44f6c1f38f77cafef9450ff93946c53bd9ca16ff"
752+
git-tree-sha1 = "7d2f8f21da5db6a806faf7b9b292296da42b2810"
753753
uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
754-
version = "2.8.2"
754+
version = "2.8.3"
755755

756756
[[deps.Pkg]]
757757
deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "Random", "SHA", "TOML", "Tar", "UUIDs", "p7zip_jll"]

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name = "LadderDGA"
22
uuid = "78e40beb-bf89-4c0e-9d2b-bee278912f2b"
33
authors = ["Julian Stobbe <Atomtomate@gmx.de> and contributors"]
44

5-
version = "1.6.0"
5+
version = "1.7.0"
66

77
[deps]
88
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
@@ -36,7 +36,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
3636
TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
3737

3838
[compat]
39-
BSE_Asymptotics = "3.0.2"
39+
BSE_Asymptotics = "3.3.0"
4040
DataFrames = "1.7.0"
4141
Dispersions = "1.0.1"
4242
EquivalenceClassesConstructor = "0.5.5"

src/BSETools.jl

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -214,14 +214,15 @@ This method solves the following equation:
214214
``
215215
"""
216216
function calc_χγ(type::Symbol, h::Union{lDΓAHelper,AlDΓAHelper}, χ₀::χ₀T; verbose=true, ω_symmetric::Bool=false, use_threads=false)
217-
calc_χγ(type, getfield(h, Symbol("Γ_$(type)")), χ₀, h.kG, h.mP, h.sP, verbose=verbose, ω_symmetric=ω_symmetric, use_threads=use_threads)
217+
calc_χγ(type, getfield(h, Symbol("Γ_$(type)")), χ₀, h.kG, h.mP, h.sP; verbose=verbose, ω_symmetric=ω_symmetric, use_threads=use_threads)
218218
end
219219

220-
function calc_χγ(type::Symbol, Γr::ΓT, χ₀::χ₀T, kG::KGrid, mP::ModelParameters, sP::SimulationParameters; verbose=true, ω_symmetric::Bool=false, use_threads=false)
220+
function calc_χγ(type::Symbol, Γr::ΓT, χ₀::χ₀T, kG::KGrid, mP::ModelParameters, sP::SimulationParameters;
221+
verbose=true, ω_symmetric::Bool=false, use_threads=false)
221222
s = type == :d ? -1 : 1
222223
!(type in (:d, :m)) && error("Unkown type $type")
223224

224-
NT = Threads.nthreads()
225+
225226
= 2 * sP.n_iν
226227
Nq = length(kG.kMult)
227228
= size(χ₀.data, χ₀.axis_types[])
@@ -232,31 +233,36 @@ function calc_χγ(type::Symbol, Γr::ΓT, χ₀::χ₀T, kG::KGrid, mP::ModelPa
232233
qi_range = 1:Nq
233234
χ_ω = Vector{Float64}(undef, Nω)
234235

235-
236236
if use_threads
237+
bthreads = BLAS.get_num_threads()
238+
BLAS.set_num_threads(1)
239+
NT = Threads.nthreads()
237240
χννpω = [Matrix{ComplexF64}(undef, Nν, Nν) for ti in 1:NT]
238241
ipiv = [Vector{Int}(undef, Nν) for ti in 1:NT]
239-
work = [_gen_inv_work_arr(χννpω[1], ipiv[1]) for ti in 1:NT]
242+
work = [_gen_inv_work_arr(χννpω[Threads.threadid()], ipiv[Threads.threadid()]) for ti in 1:NT]
240243
λ_cache = [Vector{ComplexF64}(undef, Nν) for ti in 1:NT]
244+
diag_cache = [similar(sP.χ_helper.diag_asym_buffer) for ti in 1:NT]
241245
Threads.@threads for qi in qi_range
242246
for ωm in ωm_range
243247
ωi = ωm + sP.n_iω + 1
244248
invert_BSE!(χ, γ, χννpω[Threads.threadid()], ipiv[Threads.threadid()], work[Threads.threadid()],
245-
sP.χ_helper, λ_cache[Threads.threadid()], type, ω_symmetric, s, Γr, χ₀.data, χ₀.asym,
249+
sP.χ_helper, λ_cache[Threads.threadid()], diag_cache[Threads.threadid()], type, ω_symmetric, s, Γr, χ₀.data, χ₀.asym,
246250
χ₀.ν_shell_size, qi, sP.n_iω, ωm, ωi, mP.U, mP.β)
247251
end
248252
end
253+
BLAS.set_num_threads(bthreads)
249254
else
250255
χννpω = Matrix{ComplexF64}(undef, Nν, Nν)
251256
ipiv = Vector{Int}(undef, Nν)
252257
work = _gen_inv_work_arr(χννpω, ipiv)
253258
λ_cache = Vector{ComplexF64}(undef, Nν)
254259

255-
for qi in qi_range
256-
for ωm in ωm_range
257-
ωi = ωm + sP.n_iω + 1
258-
invert_BSE!(χ, γ, χννpω, ipiv, work, sP.χ_helper, λ_cache, type, ω_symmetric, s, Γr, χ₀.data, χ₀.asym,
259-
χ₀.ν_shell_size, qi, sP.n_iω, ωm, ωi, mP.U, mP.β)
260+
for ωm in ωm_range
261+
ωi = ωm + sP.n_iω + 1
262+
for qi in qi_range
263+
invert_BSE!(χ, γ, χννpω, ipiv, work,
264+
sP.χ_helper, λ_cache, sP.χ_helper.diag_asym_buffer, type, ω_symmetric, s, Γr, χ₀.data, χ₀.asym,
265+
χ₀.ν_shell_size, qi, sP.n_iω, ωm, ωi, mP.U, mP.β)
260266
end
261267
end
262268
end
@@ -272,23 +278,25 @@ function calc_χγ(type::Symbol, Γr::ΓT, χ₀::χ₀T, kG::KGrid, mP::ModelPa
272278
end
273279

274280
function invert_BSE!::AbstractArray{Float64,2}, γ::AbstractArray{ComplexF64,3}, χννpω::AbstractArray{ComplexF64,2},
275-
ipiv::Vector{Int}, work::Vector{ComplexF64}, χ_helper, λ_cache, type::Symbol, ω_symmetric::Bool, s::Int,
281+
ipiv::Vector{Int}, work::Vector{ComplexF64}, χ_helper::BSE_Asym_Helper, λ_cache::Vector{ComplexF64}, diag_cache::Vector{ComplexF64},
282+
type::Symbol, ω_symmetric::Bool, s::Int,
276283
Γr::AbstractArray{ComplexF64,3}, χ₀_data::AbstractArray{ComplexF64,3}, χ₀_asym::AbstractArray{ComplexF64,2},
277284
ν_shell_size::Int, qi::Int, ωm_max::Int, ωm::Int, ωi::Int, U::Float64, β::Float64)
278-
χννpω[:, :] = deepcopy(Γr[:, :, ωi])
285+
286+
copy!(χννpω, view(Γr,:,:, ωi))
279287
for l in axes(χννpω, 1)
280-
χννpω[l, l] += 1.0 / χ₀_data[qi, ν_shell_size+l, ωi]
288+
@inbounds χννpω[l, l] += 1.0 / χ₀_data[qi, ν_shell_size+l, ωi]
281289
end
282-
283290
inv!(χννpω, ipiv, work)
284291
if typeof(χ_helper) <: BSE_Asym_Helpers
285292
χ[qi, ωi] = real(
286-
calc_χλ_impr!(λ_cache, type, ωm, χννpω,
287-
view(χ₀_data, qi, :, ωi), U, β,
288-
χ₀_asym[qi, ωi], χ_helper,
289-
),
290-
)
291-
γ[qi, :, ωi] = (1 .- s * λ_cache) ./ (1 .+ s * U .* χ[qi, ωi])
293+
calc_χλ_impr!(λ_cache, diag_cache, type, qi, ωi, ωm, χννpω,
294+
χ₀_data, U, β,
295+
χ₀_asym[qi, ωi], χ_helper,
296+
))
297+
for νi in axes(γ,2)
298+
@inbounds γ[qi, νi, ωi] = (1 - s * λ_cache[νi]) / (1 + s * U * χ[qi, ωi])
299+
end
292300
else
293301
if typeof(χ_helper) === BSE_SC_Helper
294302
improve_χ!(type, ωi, view(χννpω, :, :, ωi), view(χ₀, qi, :, ωi), U, β, χ_helper)
@@ -306,7 +314,6 @@ function invert_BSE!(χ::AbstractArray{Float64,2}, γ::AbstractArray{ComplexF64,
306314
end
307315

308316

309-
310317
"""
311318
calc_gen_χ(Γr::ΓT, χ₀::χ₀T, kG::KGrid)
312319

0 commit comments

Comments
 (0)