Skip to content

Commit 0ea2afe

Browse files
abussyniklasschmitz
authored andcommitted
GPU| reduce bubbles and kernel overhead (#1132)
1 parent 67b8ba7 commit 0ea2afe

File tree

2 files changed

+4
-6
lines changed

2 files changed

+4
-6
lines changed

src/densities.jl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ using an optional `occupation_threshold`. By default all occupation numbers are
3838
.* (basis.fft_grid.ifft_normalization)^2
3939
.* abs2.(storage.ψnk_real))
4040

41-
synchronize_device(basis.architecture)
4241
end
4342
ρ = sum(getfield.(storages, ))
4443

@@ -93,7 +92,6 @@ end
9392
.* storage.δψnk_real
9493
.+ δoccupation[ik][n] .* basis.kweights[ik] .* abs2.(storage.ψnk_real))
9594

96-
synchronize_device(basis.architecture)
9795
end
9896
δρ = sum(getfield.(storages, :δρ))
9997

src/terms/Hamiltonian.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -148,11 +148,10 @@ end
148148
to = TimerOutput() # Thread-local timer output
149149
ψ_real = storage.ψ_reals
150150

151-
@timeit to "local+kinetic" begin
151+
@timeit to "local" begin
152152
ifft!(ψ_real, H.basis, H.kpoint, ψ[:, iband]; normalize=false)
153153
ψ_real .*= potential
154154
fft!(Hψ[:, iband], H.basis, H.kpoint, ψ_real; normalize=false) # overwrites ψ_real
155-
Hψ[:, iband] .+= H.fourier_op.multiplier .* ψ[:, iband]
156155
end
157156

158157
if have_divAgrad
@@ -167,10 +166,11 @@ end
167166
if Threads.threadid() == 1
168167
merge!(DFTK.timer, to; tree_point=[t.name for t in DFTK.timer.timer_stack])
169168
end
170-
171-
synchronize_device(H.basis.architecture)
172169
end
173170

171+
# Kinetic term
172+
.+= H.fourier_op.multiplier .* ψ
173+
174174
# Apply the nonlocal operator.
175175
if !isnothing(H.nonlocal_op)
176176
@timing "nonlocal" begin

0 commit comments

Comments
 (0)