Fix DFPT wrt temperature (#1156)

antoine-levitt · niklasschmitz · commit 4147fe6fbb54 · 2025-10-09T17:29:52.000+02:00
diff --git a/examples/polarizability.jl b/examples/polarizability.jl
@@ -105,7 +105,7 @@ println("Polarizability :   $polarizability")
 
 ## Multiply δVext times the Bloch waves, then solve the Dyson equation:
 δVψ = DFTK.multiply_ψ_by_blochwave(scfres.basis, scfres.ψ, δVext)
-res = DFTK.solve_ΩplusK_split(scfres, -δVψ; verbose=true)
+res = DFTK.solve_ΩplusK_split(scfres, δVψ; verbose=true)
 
 # From the result of `solve_ΩplusK_split` we can easily compute the polarisabilities:
 
diff --git a/src/Model.jl b/src/Model.jl
@@ -92,24 +92,27 @@ If you want to pass custom symmetry operations (e.g. a reduced or extended set)
 external potential breaks some of the passed symmetries. Use `false` to turn off
 symmetries completely.
 """
-function Model(lattice::AbstractMatrix{T},
+function Model(lattice::AbstractMatrix{Tstatic},
                atoms::Vector{<:Element}=Element[],
-               positions::Vector{<:AbstractVector}=Vec3{T}[];
+               positions::Vector{<:AbstractVector}=Vec3{Tstatic}[];
                model_name="custom",
                εF=nothing,
                n_electrons::Union{Int,Nothing}=isnothing(εF) ?
                                                n_electrons_from_atoms(atoms) : nothing,
                # Force electrostatics with non-neutral cells; results not guaranteed.
                # Set to `true` by default for charged systems.
                disable_electrostatics_check=all(iszero, charge_ionic.(atoms)),
-               magnetic_moments=T[],
+               magnetic_moments=Tstatic[],
                terms=[Kinetic()],
-               temperature=zero(T),
+               temperature=zero(Tstatic),
                smearing=temperature > 0 ? Smearing.FermiDirac() : Smearing.None(),
                spin_polarization=determine_spin_polarization(magnetic_moments),
                symmetries=default_symmetries(lattice, atoms, positions, magnetic_moments,
                                              spin_polarization, terms),
-               ) where {T <: Real}
+               ) where {Tstatic <: Real}
+    # # a bit convoluted because kwargs can't determine type parameters
+    T = promote_type(Tstatic, typeof(temperature))
+
     # Validate εF and n_electrons
     if !isnothing(εF)  # fixed Fermi level
         if !isnothing(n_electrons)
@@ -250,9 +253,9 @@ function Model{T}(model::Model;
     Model(T.(lattice), atoms, positions;
           model.model_name,
           model.n_electrons,
-          magnetic_moments,
+          magnetic_moments=T.(magnetic_moments),
           terms=model.term_types,
-          model.temperature,
+          temperature=T(model.temperature),
           model.smearing,
           model.εF,
           model.spin_polarization,
diff --git a/src/Smearing.jl b/src/Smearing.jl
@@ -83,7 +83,13 @@ function xlogx(x)
 end
 function entropy(S::FermiDirac, x)
     f = occupation(S, x)
-    - (xlogx(f) + xlogx(1 - f))
+    # protect against the occupation being exactly zero or one, which causes trouble with the derivative
+    # this check is a bit stupid, but if we just check for f == 0, the branch won't get picked up by forwarddiff
+    if abs(f) < eps(typeof(x))|| abs(1-f) < eps(typeof(x))
+        zero(x)
+    else
+        - (xlogx(f) + xlogx(1 - f))
+    end
 end
 function occupation_divided_difference(S::FermiDirac, x, y, εF, temp)
     temp == 0 && return occupation_divided_difference(None(), x, y, εF, temp)
diff --git a/src/postprocess/phonon.jl b/src/postprocess/phonon.jl
@@ -82,7 +82,7 @@ in reduced coordinates.
         isnothing(δHψs_αs) && continue
         # Response solver to get δψ
         (; δψ, δρ, δoccupation) = solve_ΩplusK_split(ham, ρ, ψ, occupation, εF, eigenvalues,
-                                                     -δHψs_αs; q, kwargs...)
+                                                     δHψs_αs; q, kwargs...)
         δoccupations[α, s] = δoccupation
         δρs[α, s] = δρ
         δψs[α, s] = δψ
diff --git a/src/postprocess/refine.jl b/src/postprocess/refine.jl
@@ -150,7 +150,7 @@ function refine_scfres(scfres, basis_ref::PlaneWaveBasis{T};
     ΩpKe2 = apply_Ω(e2, ψr, hamr, Λ) .+ apply_K(basis_ref, e2, ψr, ρr, occ)
     ΩpKe2 = transfer_blochwave(ΩpKe2, basis_ref, basis)
 
-    rhs = resLF - ΩpKe2
+    rhs = ΩpKe2 - resLF
 
     # Invert Ω+K on the small space
     ΩpK_res = solve_ΩplusK(basis, ψ, rhs, occ; tol, kwargs...)
diff --git a/src/response/chi0.jl b/src/response/chi0.jl
@@ -275,36 +275,37 @@ The derivatives of the occupations are in-place stored in δocc.
 The tuple (; δocc, δεF) is returned. It is assumed the passed `δocc`
 are initialised to zero.
 """
-function compute_δocc!(δocc, basis::PlaneWaveBasis{T}, ψ, εF, ε, δHψ) where {T}
+function compute_δocc!(δocc, basis::PlaneWaveBasis{T}, ψ, εF, ε, δHψ, δtemperature) where {T}
     model = basis.model
     temperature = model.temperature
     smearing = model.smearing
     filled_occ = filled_occupation(model)
 
-    # δocc = fn' * (δεn - δεF)
+    # compute the derivative of
+    # occ[k][n] = filled_occ*occupation((εnk-εF)/T)
     δεF = zero(T)
     if !is_effective_insulator(basis, ε, εF; smearing, temperature)
         # First compute δocc without self-consistent Fermi δεF.
         D = zero(T)
         for ik = 1:length(basis.kpoints), (n, εnk) in enumerate(ε[ik])
-            enred = (εnk - εF) / temperature
             δεnk = real(dot(ψ[ik][:, n], δHψ[ik][:, n]))
-            fpnk = filled_occ * Smearing.occupation_derivative(smearing, enred) / temperature
-            δocc[ik][n] = δεnk * fpnk
-            D += fpnk * basis.kweights[ik]
+            εnkred = (εnk - εF) / temperature
+            δεnkred = δεnk/temperature - εnkred*δtemperature/temperature
+            fpnk = filled_occ * Smearing.occupation_derivative(smearing, εnkred)
+            δocc[ik][n] = fpnk * δεnkred
+            D -= fpnk * basis.kweights[ik] / temperature  # while we're at it, accumulate the total DOS D
         end
-        D = mpi_sum(D, basis.comm_kpts)  # equal to minus the total DOS
+        D = mpi_sum(D, basis.comm_kpts)
 
         if isnothing(model.εF)  # εF === nothing means that Fermi level is fixed by model
-            # Compute δεF…
+            # Compute δεF from δ ∑ occ = 0…
             δocc_tot = mpi_sum(sum(basis.kweights .* sum.(δocc)), basis.comm_kpts)
-            δεF = δocc_tot / D
+            δεF = -δocc_tot / D
 
-            # … and recompute δocc, taking into account δεF.
+            # … and add the corresponding contribution to δocc
             for ik = 1:length(basis.kpoints), (n, εnk) in enumerate(ε[ik])
-                enred = (εnk - εF) / temperature
-                fpnk = filled_occ * Smearing.occupation_derivative(smearing, enred) / temperature
-                δocc[ik][n] -= fpnk * δεF
+                fpnk = filled_occ * Smearing.occupation_derivative(smearing, (εnk - εF) / temperature)
+                δocc[ik][n] -= fpnk * δεF / temperature
             end
         end
     end
@@ -396,6 +397,7 @@ Compute the orbital and occupation changes as a result of applying the ``χ_0``
 to the Hamiltonian change `δH` represented by the matrix-vector products `δHψ`. 
 """
 @views @timing function apply_χ0_4P(ham, ψ, occupation, εF, eigenvalues, δHψ;
+                                    δtemperature=zero(eltype(ham.basis)),
                                     occupation_threshold, q=zero(Vec3{eltype(ham.basis)}),
                                     bandtolalg, tol=1e-9, kwargs_sternheimer...)
     basis = ham.basis
@@ -436,10 +438,11 @@ to the Hamiltonian change `δH` represented by the matrix-vector products `δHψ
     δoccupation = zero.(occupation)
     if iszero(q)
         δocc_occ = [δoccupation[ik][maskk] for (ik, maskk) in enumerate(mask_occ)]
-        (; δεF) = compute_δocc!(δocc_occ, basis, ψ_occ, εF, ε_occ, δHψ_minus_q_occ)
+        (; δεF) = compute_δocc!(δocc_occ, basis, ψ_occ, εF, ε_occ, δHψ_minus_q_occ, δtemperature)
     else
         # When δH is not periodic, δH ψnk is a Bloch wave at k+q and ψnk at k,
         # so that δεnk = <ψnk|δH|ψnk> = 0 and there is no occupation shift
+        @assert δtemperature == 0 # TODO think about this
         δεF = zero(εF)
     end
 
@@ -466,6 +469,7 @@ Parameters:
 - `maxiter`: Maximal number of CG iterations per k and band for Sternheimer
 """
 function apply_χ0(ham, ψ, occupation, εF::T, eigenvalues, δV::AbstractArray{TδV};
+                  δtemperature=zero(eltype(ham.basis)),
                   occupation_threshold=default_occupation_threshold(TδV),
                   q=zero(Vec3{eltype(ham.basis)}),
                   bandtolalg=BandtolBalanced(ham.basis, ψ, occupation; occupation_threshold),
@@ -493,7 +497,7 @@ function apply_χ0(ham, ψ, occupation, εF::T, eigenvalues, δV::AbstractArray{
     #   δHψ_k = δV_{q} · ψ_{k-q}.
     δHψ = multiply_ψ_by_blochwave(basis, ψ, δV, q)
     res = apply_χ0_4P(ham, ψ, occupation, εF, eigenvalues, δHψ;
-                      occupation_threshold, q, bandtolalg,
+                      δtemperature, occupation_threshold, q, bandtolalg,
                       kwargs_sternheimer...)
 
     δρ = compute_δρ(basis, ψ, res.δψ, occupation, res.δoccupation; occupation_threshold, q)
diff --git a/src/response/hessian.jl b/src/response/hessian.jl
@@ -103,9 +103,9 @@ end
 
 """
 Solve density-functional perturbation theory problem,
-that is return δψ where (Ω+K) δψ = rhs.
+that is return δψ where (Ω+K) δψ = -δHextψ.
 """
-@timing function solve_ΩplusK(basis::PlaneWaveBasis{T}, ψ, rhs, occupation;
+@timing function solve_ΩplusK(basis::PlaneWaveBasis{T}, ψ, δHextψ, occupation;
                               callback=ResponseCallback(), tol=1e-10) where {T}
     # for now, all orbitals have to be fully occupied -> need to strip them beforehand
     check_full_occupation(basis, occupation)
@@ -118,9 +118,9 @@ that is return δψ where (Ω+K) δψ = rhs.
     unpack(x) = unpack_ψ(reinterpret_complex(x), size.(ψ))
     unsafe_unpack(x) = unsafe_unpack_ψ(reinterpret_complex(x), size.(ψ))
 
-    # project rhs on the tangent space before starting
-    proj_tangent!(rhs, ψ)
-    rhs_pack = pack(rhs)
+    # project δHextψ on the tangent space before starting
+    proj_tangent!(δHextψ, ψ)
+    δHextψ_pack = pack(δHextψ)
 
     # preconditioner
     Pks = [PreconditionerTPA(basis, kpt) for kpt in basis.kpoints]
@@ -145,15 +145,15 @@ that is return δψ where (Ω+K) δψ = rhs.
         Ωδψ = apply_Ω(δψ, ψ, H, Λ)
         pack(Ωδψ + Kδψ)
     end
-    J = LinearMap{T}(ΩpK, size(rhs_pack, 1))
+    J = LinearMap{T}(ΩpK, size(δHextψ_pack, 1))
 
-    # solve (Ω+K) δψ = rhs on the tangent space with CG
+    # solve (Ω+K) δψ = -δHextψ on the tangent space with CG
     function proj(x)
         δψ = unpack(x)
         proj_tangent!(δψ, ψ)
         pack(δψ)
     end
-    res = cg(J, rhs_pack; precon=FunctionPreconditioner(f_ldiv!), proj, tol,
+    res = cg(J, -δHextψ_pack; precon=FunctionPreconditioner(f_ldiv!), proj, tol,
              callback, comm=basis.comm_kpts)
     (; δψ=unpack(res.x), res.converged, res.tol, res.residual_norm,
      res.n_iter)
@@ -219,10 +219,10 @@ function (cb::OmegaPlusKDefaultCallback)(info)
 end
 
 """
-Solve the problem `(Ω+K) δψ = rhs` (density-functional perturbation theory)
-using a split algorithm, where `rhs` is typically
-`-δHextψ` (the negative matvec of an external perturbation with the SCF orbitals `ψ`) and
-`δψ` is the corresponding total variation in the orbitals `ψ`. Additionally returns:
+Solve the problem `(Ω+K) δψ = -δHextψ` (density-functional perturbation theory)
+using a split algorithm, where
+`δψ` is the total variation in the orbitals `ψ` corresponding to the external perturbation δHext.
+Additionally returns:
     - `δρ`:  Total variation in density
     - `δHψ`: Total variation in Hamiltonian applied to orbitals
     - `δeigenvalues`: Total variation in eigenvalues
@@ -243,7 +243,8 @@ Input parameters:
    see [arxiv 2505.02319](https://arxiv.org/pdf/2505.02319) for more details.
 """
 @timing function solve_ΩplusK_split(ham::Hamiltonian, ρ::AbstractArray{T}, ψ, occupation, εF,
-                                    eigenvalues, rhs;
+                                    eigenvalues, δHextψ;
+                                    δtemperature=zero(real(T)),
                                     tol=1e-8, verbose=true,
                                     mixing=SimpleMixing(),
                                     occupation_threshold,
@@ -268,7 +269,7 @@ Input parameters:
     #          =  χ04P (-1 + E K2P (1 - χ02P K2P)^-1 R (-χ04P))
     # where χ02P = R χ04P E and K2P = R K E
     basis = ham.basis
-    @assert size(rhs[1]) == size(ψ[1])  # Assume the same number of bands in ψ and rhs
+    @assert size(δHextψ[1]) == size(ψ[1])
     start_ns = time_ns()
 
     # TODO Better initial guess handling. Especially between the last iteration of the GMRES
@@ -281,10 +282,11 @@ Input parameters:
 
     # compute δρ0 (ignoring interactions)
     δρ0 = let  # Make sure memory owned by res0 is freed
-        res0 = apply_χ0_4P(ham, ψ, occupation, εF, eigenvalues, -rhs;
+        res0 = apply_χ0_4P(ham, ψ, occupation, εF, eigenvalues, δHextψ;
+                           δtemperature,
                            maxiter=maxiter_sternheimer, tol=tol * factor_initial,
                            bandtolalg, occupation_threshold,
-                           q, kwargs...)  # = -χ04P * rhs
+                           q, kwargs...)  # = χ04P * δHext
         callback((; stage=:noninteracting, runtime_ns=time_ns() - start_ns,
                     Axinfos=[(; basis, tol=tol*factor_initial, res0...)]))
         compute_δρ(basis, ψ, res0.δψ, occupation, res0.δoccupation;
@@ -308,42 +310,46 @@ Input parameters:
         @warn "Solve_ΩplusK_split solver not converged"
     end
 
-    # Compute total change in Hamiltonian applied to ψ
+    # Now we got δρ, but we're not done yet, because we want the full output of the four-point apply_χ0_4P,
+    # so we redo an apply_χ0_4P
+
+    # Induced potential variation
     δVind = apply_kernel(basis, δρ; ρ, q)  # Change in potential induced by δρ
 
+    # Total variation δHtot ψ
     # For phonon calculations, assemble
     #   δHψ_k = δV_{q} · ψ_{k-q}.
-    δHψ = multiply_ψ_by_blochwave(basis, ψ, δVind, q) .- rhs
-
-    # Compute total change in eigenvalues
-    δeigenvalues = map(ψ, δHψ) do ψk, δHψk
-        map(eachcol(ψk), eachcol(δHψk)) do ψnk, δHψnk
-            real(dot(ψnk, δHψnk))  # δε_{nk} = <ψnk | δH | ψnk>
-        end
-    end
+    δHtotψ = multiply_ψ_by_blochwave(basis, ψ, δVind, q) .+ δHextψ
 
     # Compute final orbital response
     # TODO Here we just use what DFTK did before the inexact Krylov business, namely
     #      a fixed Sternheimer tolerance of tol / 10. There are probably
     #      smarter things one could do here
-    resfinal = apply_χ0_4P(ham, ψ, occupation, εF, eigenvalues, δHψ;
+    resfinal = apply_χ0_4P(ham, ψ, occupation, εF, eigenvalues, δHtotψ;
+                           δtemperature,
                            maxiter=maxiter_sternheimer, tol=tol * factor_final,
                            bandtolalg, occupation_threshold, q, kwargs...)
     callback((; stage=:final, runtime_ns=time_ns() - start_ns,
                 Axinfos=[(; basis, tol=tol*factor_final, resfinal...)]))
+    # Compute total change in eigenvalues
+    δeigenvalues = map(ψ, δHtotψ) do ψk, δHtotψk
+        map(eachcol(ψk), eachcol(δHtotψk)) do ψnk, δHtotψnk
+            real(dot(ψnk, δHtotψnk))  # δε_{nk} = <ψnk | δHtot | ψnk>
+        end
+    end
 
-    (; resfinal.δψ, δρ, δHψ, δVind, δρ0, δeigenvalues, resfinal.δoccupation,
+    (; resfinal.δψ, δρ, δHtotψ, δVind, δρ0, δeigenvalues, resfinal.δoccupation,
        resfinal.δεF, ε_adj, info_gmres)
 end
 
-function solve_ΩplusK_split(scfres::NamedTuple, rhs; kwargs...)
+function solve_ΩplusK_split(scfres::NamedTuple, δHextψ; kwargs...)
     if (scfres.mixing isa KerkerMixing || scfres.mixing isa KerkerDosMixing)
         mixing = scfres.mixing
     else
         mixing = SimpleMixing()
     end
     solve_ΩplusK_split(scfres.ham, scfres.ρ, scfres.ψ, scfres.occupation,
-                       scfres.εF, scfres.eigenvalues, rhs;
+                       scfres.εF, scfres.eigenvalues, δHextψ;
                        scfres.occupation_threshold, mixing,
                        bandtolalg=BandtolBalanced(scfres), kwargs...)
 end
diff --git a/src/scf/newton.jl b/src/scf/newton.jl
@@ -117,7 +117,7 @@ function newton(basis::PlaneWaveBasis{T}, ψ0;
         # compute Newton step and next iteration
         res = compute_projected_gradient(basis, ψ, occupation)
         # solve (Ω+K) δψ = -res so that the Newton step is ψ <- ψ + δψ
-        δψ = solve_ΩplusK(basis, ψ, -res, occupation; tol=tol_cg, callback=identity).δψ
+        δψ = solve_ΩplusK(basis, ψ, res, occupation; tol=tol_cg, callback=identity).δψ
         ψ  = [ortho_qr(ψ[ik] + δψ[ik]) for ik = 1:Nk]
 
         ρout        = compute_density(basis, ψ, occupation)
diff --git a/src/workarounds/forwarddiff_rules.jl b/src/workarounds/forwarddiff_rules.jl
@@ -166,7 +166,7 @@ function construct_value(model::Model{T}) where {T <: Dual}
           newpositions;
           model.model_name,
           model.n_electrons,
-          magnetic_moments=[],  # Symmetries given explicitly
+          magnetic_moments=value_type(T)[],  # Symmetries given explicitly
           terms=model.term_types,
           temperature=ForwardDiff.value(model.temperature),
           model.smearing,
@@ -233,12 +233,12 @@ function self_consistent_field(basis_dual::PlaneWaveBasis{T};
                                       scfres.εF).ham
         ham_dual * scfres.ψ
     end
-
     # Implicit differentiation
     response.verbose && println("Solving response problem")
     δresults = ntuple(ForwardDiff.npartials(T)) do α
         δHextψ = [ForwardDiff.partials.(δHextψk, α) for δHextψk in Hψ_dual]
-        solve_ΩplusK_split(scfres, -δHextψ;
+        δtemperature = ForwardDiff.partials(basis_dual.model.temperature, α)
+        solve_ΩplusK_split(scfres, δHextψ; δtemperature,
                            tol=last(scfres.history_Δρ), response.verbose)
     end
 
diff --git a/test/forwarddiff.jl b/test/forwarddiff.jl
@@ -402,4 +402,33 @@ end
     
     # Check that scfres_dual has the same parameters as scfres
     @test isempty(setdiff(keys(scfres), keys(scfres_dual)))
-end
+end
+
+
+@testitem "ForwardDiff wrt temperature" tags=[:dont_test_mpi, :minimal] begin
+    using DFTK
+    using ForwardDiff
+    using LinearAlgebra
+    using PseudoPotentialData
+
+    a = 10.26  # Silicon lattice constant in Bohr
+    lattice = a / 2 * [[0 1 1.];
+                       [1 0 1.];
+                       [1 1 0.]]
+    Si = ElementPsp(:Si, PseudoFamily("dojo.nc.sr.lda.v0_4_1.standard.upf"))
+    atoms     = [Si, Si]
+    positions = [ones(3)/8, -ones(3)/8]
+
+    function get(T)
+        model = model_DFT(lattice, atoms, positions; functionals=LDA(), temperature=T)
+        basis = PlaneWaveBasis(model; Ecut=10, kgrid=[1, 1, 1])
+        scfres = self_consistent_field(basis, tol=1e-12)
+        scfres.energies.total
+    end
+    T0 = .01
+    derivative_ε = let ε = 1e-5
+        (get(T0+ε) - get(T0-ε)) / 2ε
+    end
+    derivative_fd = ForwardDiff.derivative(get, T0)
+    @test norm(derivative_ε - derivative_fd) < 1e-4
+end
diff --git a/test/hessian.jl b/test/hessian.jl