Skip to content

Commit 0ea1d03

Browse files
committed
Improve GPU performance of update_jacobian by pulling out common subexpressions
Sedimentation and the microphysics tracers loop were affected Three new scratch variables were introduced.
1 parent 6b17095 commit 0ea1d03

File tree

2 files changed

+40
-24
lines changed

2 files changed

+40
-24
lines changed

src/cache/temporary_quantities.jl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,16 @@ function temporary_quantities(Y, atmos)
9292
ClimaCore.Geometry.WVector{FT},
9393
},
9494
),
95+
ᶠsed_tracer_advection = similar(
96+
Y.f,
97+
ClimaCore.MatrixFields.BandMatrixRow{
98+
ClimaCore.Utilities.PlusHalf{Int64}(0),
99+
1,
100+
ClimaCore.Geometry.WVector{FT},
101+
},
102+
),
103+
ᶠtracer_advection = similar(Y.f, BidiagonalMatrixRow{Adjoint{FT, C3{FT}}}),
104+
ᶠtracer_advection_upwind = similar(Y.f, TridiagonalMatrixRow{FT}),
95105
ᶠdiagonal_matrix_ct3xct3 = similar(
96106
Y.f,
97107
DiagonalMatrixRow{
@@ -112,6 +122,10 @@ function temporary_quantities(Y, atmos)
112122
Y.c,
113123
BidiagonalMatrixRow{Adjoint{FT, C3{FT}}},
114124
),
125+
ᶜadvection_matrix_2 = similar(
126+
Y.c,
127+
BidiagonalMatrixRow{Adjoint{FT, C3{FT}}},
128+
),
115129
ᶜdiffusion_h_matrix = similar(Y.c, TridiagonalMatrixRow{FT}),
116130
ᶜdiffusion_u_matrix = similar(Y.c, TridiagonalMatrixRow{FT}),
117131
ᶜtridiagonal_matrix_scalar = similar(Y.c, TridiagonalMatrixRow{FT}),

src/prognostic_equations/implicit/manual_sparse_jacobian.jl

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,9 @@ function update_jacobian!(alg::ManualSparseJacobian, cache, Y, p, dtγ, t)
399399
ᶜdiffusion_h_matrix,
400400
ᶜdiffusion_u_matrix,
401401
ᶜtridiagonal_matrix_scalar,
402+
ᶠsed_tracer_advection,
403+
ᶠtracer_advection,
404+
ᶠtracer_advection_upwind,
402405
ᶠbidiagonal_matrix_ct3,
403406
ᶠbidiagonal_matrix_ct3_2,
404407
ᶠtridiagonal_matrix_c3,
@@ -1006,11 +1009,9 @@ function update_jacobian!(alg::ManualSparseJacobian, cache, Y, p, dtγ, t)
10061009
p.atmos.microphysics_model isa Microphysics2Moment
10071010
)
10081011

1012+
10091013
ᶜa = (@. lazy(draft_area(Y.c.sgsʲs.:(1).ρa, ᶜρʲs.:(1))))
1010-
ᶜ∂a∂z =
1011-
@. lazy(
1012-
ᶜprecipdivᵥ(ᶠinterp(ᶜJ) / ᶠJ * ᶠright_bias(Geometry.WVector(ᶜa))),
1013-
)
1014+
ᶜ∂a∂z = @. ᶜprecipdivᵥ(ᶠinterp(ᶜJ) / ᶠJ * ᶠright_bias(Geometry.WVector(ᶜa)))
10141015
ᶜinv_ρ̂ = (@. lazy(
10151016
specific(
10161017
FT(1),
@@ -1058,16 +1059,17 @@ function update_jacobian!(alg::ManualSparseJacobian, cache, Y, p, dtγ, t)
10581059
) DiagonalMatrixRow(g³³(ᶠgⁱʲ))
10591060

10601061
# sedimentation
1062+
# (pull out common subexpression for performance)
1063+
ᶠsed_tracer_advection =
1064+
@. DiagonalMatrixRow(ᶠinterp(ᶜρʲs.:(1) * ᶜJ) / ᶠJ)
1065+
ᶠright_bias_matrix()
1066+
DiagonalMatrixRow(-Geometry.WVector(ᶜwʲ))
10611067
@. ᶜtridiagonal_matrix_scalar =
10621068
dtγ * ifelse(ᶜ∂a∂z < 0,
1063-
-(ᶜprecipdivᵥ_matrix())
1064-
DiagonalMatrixRow(ᶠinterp(ᶜρʲs.:(1) * ᶜJ) / ᶠJ)
1065-
ᶠright_bias_matrix()
1066-
DiagonalMatrixRow(-Geometry.WVector(ᶜwʲ) * ᶜa),
1069+
-(ᶜprecipdivᵥ_matrix()) ᶠsed_tracer_advection *
1070+
DiagonalMatrixRow(ᶜa),
10671071
-DiagonalMatrixRow(ᶜa) ᶜprecipdivᵥ_matrix()
1068-
DiagonalMatrixRow(ᶠinterp(ᶜρʲs.:(1) * ᶜJ) / ᶠJ)
1069-
ᶠright_bias_matrix()
1070-
DiagonalMatrixRow(-Geometry.WVector(ᶜwʲ)),
1072+
ᶠsed_tracer_advection,
10711073
)
10721074

10731075
@. ∂ᶜχʲ_err_∂ᶜχʲ +=
@@ -1252,7 +1254,6 @@ function update_jacobian!(alg::ManualSparseJacobian, cache, Y, p, dtγ, t)
12521254
draft_area(Y.c.sgsʲs.:(1).ρa, ᶜρʲs.:(1)),
12531255
) / ᶠJ * (g³³(ᶠgⁱʲ)),
12541256
)
1255-
12561257
∂ᶜρe_tot_err_∂ᶠu₃ʲ =
12571258
matrix[@name(c.ρe_tot), @name(f.sgsʲs.:(1).u₃)]
12581259
@. ∂ᶜρe_tot_err_∂ᶠu₃ʲ =
@@ -1320,6 +1321,15 @@ function update_jacobian!(alg::ManualSparseJacobian, cache, Y, p, dtγ, t)
13201321
(@name(c.ρn_liq), @name(c.sgsʲs.:(1).n_liq)),
13211322
(@name(c.ρn_rai), @name(c.sgsʲs.:(1).n_rai)),
13221323
)
1324+
1325+
# pull common subexpressions that don't depend on which
1326+
# tracer out of the tracer loop for performance
1327+
ᶠtracer_advection = @. -(ᶜadvdivᵥ_matrix())
1328+
DiagonalMatrixRow(ᶠinterp(ᶜρʲs.:(1) * ᶜJ) / ᶠJ)
1329+
ᶠtracer_advection_upwind =
1330+
@. ᶠtracer_advection ᶠset_tracer_upwind_matrix_bcs(
1331+
ᶠtracer_upwind_matrix(ᶠu³ʲs.:(1)),
1332+
)
13231333
MatrixFields.unrolled_foreach(
13241334
microphysics_tracers,
13251335
) do (ρχ_name, χʲ_name)
@@ -1330,30 +1340,22 @@ function update_jacobian!(alg::ManualSparseJacobian, cache, Y, p, dtγ, t)
13301340
matrix[ρχ_name, χʲ_name]
13311341
@. ∂ᶜρχ_err_∂ᶜχʲ =
13321342
dtγ *
1333-
-(ᶜadvdivᵥ_matrix())
1334-
DiagonalMatrixRow(ᶠinterp(ᶜρʲs.:(1) * ᶜJ) / ᶠJ)
1335-
ᶠset_tracer_upwind_matrix_bcs(
1336-
ᶠtracer_upwind_matrix(ᶠu³ʲs.:(1)),
1337-
)
1343+
ᶠtracer_advection_upwind
13381344
DiagonalMatrixRow(draft_area(Y.c.sgsʲs.:(1).ρa, ᶜρʲs.:(1)))
13391345

13401346
∂ᶜρχ_err_∂ᶜρa =
13411347
matrix[ρχ_name, @name(c.sgsʲs.:(1).ρa)]
13421348
@. ∂ᶜρχ_err_∂ᶜρa =
13431349
dtγ *
1344-
-(ᶜadvdivᵥ_matrix())
1345-
DiagonalMatrixRow(ᶠinterp(ᶜρʲs.:(1) * ᶜJ) / ᶠJ)
1346-
ᶠset_tracer_upwind_matrix_bcs(
1347-
ᶠtracer_upwind_matrix(ᶠu³ʲs.:(1)),
1348-
)
1350+
ᶠtracer_advection_upwind
13491351
DiagonalMatrixRow(ᶜχʲ / ᶜρʲs.:(1))
13501352

13511353
∂ᶜρχ_err_∂ᶠu₃ʲ =
13521354
matrix[ρχ_name, @name(f.sgsʲs.:(1).u₃)]
13531355
@. ∂ᶜρχ_err_∂ᶠu₃ʲ =
13541356
dtγ * (
1355-
-(ᶜadvdivᵥ_matrix()) DiagonalMatrixRow(
1356-
ᶠinterp(ᶜρʲs.:(1) * ᶜJ) / ᶠJ *
1357+
ᶠtracer_advection
1358+
DiagonalMatrixRow(
13571359
ᶠset_tracer_upwind_bcs(
13581360
ᶠtracer_upwind(CT3(sign(ᶠu³ʲ_data)),
13591361
draft_area(Y.c.sgsʲs.:(1).ρa, ᶜρʲs.:(1)) * ᶜχʲ,

0 commit comments

Comments
 (0)