Skip to content

Commit c6859e6

Browse files
authored
Merge pull request #37 from magpowell/liq_or_ice_only_cloud_optics
add liq or ice cloud optics flags
2 parents eb01e87 + dd1e611 commit c6859e6

File tree

4 files changed

+325
-131
lines changed

4 files changed

+325
-131
lines changed

include_test/Radiation_solver_rt.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ class Radiation_solver_shortwave
9999
#ifdef __CUDACC__
100100
void solve_gpu(
101101
const bool switch_fluxes,
102+
const bool switch_disable_2s,
102103
const bool switch_raytracing,
103104
const bool switch_independent_column,
104105
const bool switch_cloud_optics,

src_cuda_rt/Cloud_optics_rt.cu

Lines changed: 183 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,23 @@ namespace
8686
}
8787
}
8888

89+
__global__
90+
void combine_and_store_kernel_single_phase(const int ncol, const int nlay, const Float tmin,
91+
Float* __restrict__ tau,
92+
const Float* __restrict__ l_or_i_tau, const Float* __restrict__ l_or_i_taussa)
93+
{
94+
const int icol = blockIdx.x*blockDim.x + threadIdx.x;
95+
const int ilay = blockIdx.y*blockDim.y + threadIdx.y;
96+
97+
if ( (icol < ncol) && (ilay < nlay) )
98+
{
99+
const int idx = icol + ilay*ncol;
100+
const Float tau_t = (l_or_i_tau[idx] - l_or_i_taussa[idx]);
101+
102+
tau[idx] = tau_t;
103+
}
104+
}
105+
89106
__global__
90107
void combine_and_store_kernel(const int ncol, const int nlay, const Float tmin,
91108
Float* __restrict__ tau, Float* __restrict__ ssa, Float* __restrict__ g,
@@ -108,6 +125,28 @@ namespace
108125
}
109126
}
110127

128+
__global__
129+
void combine_and_store_kernel_single_phase(const int ncol, const int nlay, const Float tmin,
130+
Float* __restrict__ tau, Float* __restrict__ ssa, Float* __restrict__ g,
131+
const Float* __restrict__ l_or_i_tau, const Float* __restrict__ l_or_i_taussa, const Float* __restrict__ l_or_i_taussag
132+
)
133+
{
134+
const int icol = blockIdx.x*blockDim.x + threadIdx.x;
135+
const int ilay = blockIdx.y*blockDim.y + threadIdx.y;
136+
137+
if ( (icol < ncol) && (ilay < nlay) )
138+
{
139+
const int idx = icol + ilay*ncol;
140+
const Float tau_t = l_or_i_tau[idx];
141+
const Float taussa = l_or_i_taussa[idx];
142+
const Float taussag = l_or_i_taussag[idx];
143+
144+
tau[idx] = tau_t;
145+
ssa[idx] = taussa / max(tau_t, tmin);
146+
g[idx] = taussag/ max(taussa, tmin);
147+
}
148+
}
149+
111150
__global__
112151
void set_mask(const int ncol, const int nlay, const Float min_value,
113152
Bool* __restrict__ mask, const Float* __restrict__ values)
@@ -182,11 +221,19 @@ void Cloud_optics_rt::cloud_optics(
182221
const Array_gpu<Float,2>& reliq, const Array_gpu<Float,2>& reice,
183222
Optical_props_2str_rt& optical_props)
184223
{
185-
const int ncol = clwp.dim(1);
186-
const int nlay = clwp.dim(2);
187-
188-
Optical_props_2str_rt clouds_liq(ncol, nlay, optical_props);
189-
Optical_props_2str_rt clouds_ice(ncol, nlay, optical_props);
224+
int ncol = -1;
225+
int nlay = -1;
226+
if (clwp.ptr() != nullptr)
227+
{
228+
ncol = clwp.dim(1);
229+
nlay = clwp.dim(2);
230+
Optical_props_2str_rt clouds_liq(ncol, nlay, optical_props);
231+
} else if (ciwp.ptr() != nullptr)
232+
{
233+
ncol = ciwp.dim(1);
234+
nlay = ciwp.dim(2);
235+
Optical_props_2str_rt clouds_ice(ncol, nlay, optical_props);
236+
}
190237

191238
// Set the mask.
192239
constexpr Float mask_min_value = Float(0.);
@@ -199,53 +246,81 @@ void Cloud_optics_rt::cloud_optics(
199246
dim3 grid_m_gpu(grid_col_m, grid_lay_m);
200247
dim3 block_m_gpu(block_col_m, block_lay_m);
201248

202-
Array_gpu<Bool,2> liqmsk({ncol, nlay});
203-
set_mask<<<grid_m_gpu, block_m_gpu>>>(
204-
ncol, nlay, mask_min_value, liqmsk.ptr(), clwp.ptr());
205-
206-
Array_gpu<Bool,2> icemsk({ncol, nlay});
207-
set_mask<<<grid_m_gpu, block_m_gpu>>>(
208-
ncol, nlay, mask_min_value, icemsk.ptr(), ciwp.ptr());
209249

210250
// Temporary arrays for storage.
211-
Array_gpu<Float,2> ltau ({ncol, nlay});
212-
Array_gpu<Float,2> ltaussa ({ncol, nlay});
213-
Array_gpu<Float,2> ltaussag({ncol, nlay});
214-
215-
Array_gpu<Float,2> itau ({ncol, nlay});
216-
Array_gpu<Float,2> itaussa ({ncol, nlay});
217-
Array_gpu<Float,2> itaussag({ncol, nlay});
218-
251+
Array_gpu<Bool,2> liqmsk({0, 0});
252+
Array_gpu<Float,2> ltau ({0, 0});
253+
Array_gpu<Float,2> ltaussa ({0, 0});
254+
Array_gpu<Float,2> ltaussag({0, 0});
255+
Array_gpu<Bool,2> icemsk({0, 0});
256+
Array_gpu<Float,2> itau ({0, 0});
257+
Array_gpu<Float,2> itaussa ({0, 0});
258+
Array_gpu<Float,2> itaussag({0, 0});
259+
if (clwp.ptr() != nullptr){
260+
liqmsk.set_dims({ncol, nlay});
261+
ltau.set_dims({ncol, nlay});
262+
ltaussa.set_dims({ncol, nlay});
263+
ltaussag.set_dims({ncol, nlay});
264+
265+
set_mask<<<grid_m_gpu, block_m_gpu>>>(
266+
ncol, nlay, mask_min_value, liqmsk.ptr(), clwp.ptr());
267+
}
268+
if (ciwp.ptr() != nullptr){
269+
icemsk.set_dims({ncol, nlay});
270+
itau.set_dims({ncol, nlay});
271+
itaussa.set_dims({ncol, nlay});
272+
itaussag.set_dims({ncol, nlay});
273+
274+
set_mask<<<grid_m_gpu, block_m_gpu>>>(
275+
ncol, nlay, mask_min_value, icemsk.ptr(), ciwp.ptr());
276+
}
219277
const int block_col = 64;
220278
const int block_lay = 1;
221-
222279
const int grid_col = ncol/block_col + (ncol%block_col > 0);
223280
const int grid_lay = nlay/block_lay + (nlay%block_lay > 0);
224281

225282
dim3 grid_gpu(grid_col, grid_lay);
226283
dim3 block_gpu(block_col, block_lay);
227284

228285
// Liquid water
229-
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
230-
ncol, nlay, ibnd-1, liqmsk.ptr(), clwp.ptr(), reliq.ptr(),
231-
this->liq_nsteps, this->liq_step_size, this->radliq_lwr,
232-
this->lut_extliq_gpu.ptr(), this->lut_ssaliq_gpu.ptr(),
233-
this->lut_asyliq_gpu.ptr(), ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
286+
if (clwp.ptr() != nullptr){
287+
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
288+
ncol, nlay, ibnd-1, liqmsk.ptr(), clwp.ptr(), reliq.ptr(),
289+
this->liq_nsteps, this->liq_step_size, this->radliq_lwr,
290+
this->lut_extliq_gpu.ptr(), this->lut_ssaliq_gpu.ptr(),
291+
this->lut_asyliq_gpu.ptr(), ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
292+
}
234293

235294
// Ice.
236-
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
237-
ncol, nlay, ibnd-1, icemsk.ptr(), ciwp.ptr(), reice.ptr(),
238-
this->ice_nsteps, this->ice_step_size, this->radice_lwr,
239-
this->lut_extice_gpu.ptr(), this->lut_ssaice_gpu.ptr(),
240-
this->lut_asyice_gpu.ptr(), itau.ptr(), itaussa.ptr(), itaussag.ptr());
241-
295+
if (ciwp.ptr() != nullptr){
296+
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
297+
ncol, nlay, ibnd-1, icemsk.ptr(), ciwp.ptr(), reice.ptr(),
298+
this->ice_nsteps, this->ice_step_size, this->radice_lwr,
299+
this->lut_extice_gpu.ptr(), this->lut_ssaice_gpu.ptr(),
300+
this->lut_asyice_gpu.ptr(), itau.ptr(), itaussa.ptr(), itaussag.ptr());
301+
}
242302
constexpr Float eps = std::numeric_limits<Float>::epsilon();
243-
244-
combine_and_store_kernel<<<grid_gpu, block_gpu>>>(
303+
if ((ciwp.ptr() != nullptr) && (clwp.ptr() != nullptr))
304+
{
305+
combine_and_store_kernel<<<grid_gpu, block_gpu>>>(
245306
ncol, nlay, eps,
246307
optical_props.get_tau().ptr(), optical_props.get_ssa().ptr(), optical_props.get_g().ptr(),
247308
ltau.ptr(), ltaussa.ptr(), ltaussag.ptr(),
248309
itau.ptr(), itaussa.ptr(), itaussag.ptr());
310+
} else if(ciwp.ptr() == nullptr)
311+
{
312+
combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>>(
313+
ncol, nlay, eps,
314+
optical_props.get_tau().ptr(), optical_props.get_ssa().ptr(), optical_props.get_g().ptr(),
315+
ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
316+
} else if (clwp.ptr() == nullptr)
317+
{
318+
combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>>(
319+
ncol, nlay, eps,
320+
optical_props.get_tau().ptr(), optical_props.get_ssa().ptr(), optical_props.get_g().ptr(),
321+
itau.ptr(), itaussa.ptr(), itaussag.ptr());
322+
}
323+
249324
}
250325

251326
// 1scl variant of cloud optics.
@@ -255,12 +330,20 @@ void Cloud_optics_rt::cloud_optics(
255330
const Array_gpu<Float,2>& reliq, const Array_gpu<Float,2>& reice,
256331
Optical_props_1scl_rt& optical_props)
257332
{
258-
const int ncol = clwp.dim(1);
259-
const int nlay = clwp.dim(2);
260-
261-
Optical_props_1scl_rt clouds_liq(ncol, nlay, optical_props);
262-
Optical_props_1scl_rt clouds_ice(ncol, nlay, optical_props);
263-
333+
int ncol = -1;
334+
int nlay = -1;
335+
if (clwp.ptr() != nullptr)
336+
{
337+
ncol = clwp.dim(1);
338+
nlay = clwp.dim(2);
339+
Optical_props_1scl_rt clouds_liq(ncol, nlay, optical_props);
340+
} else if (ciwp.ptr() != nullptr)
341+
{
342+
ncol = ciwp.dim(1);
343+
nlay = ciwp.dim(2);
344+
Optical_props_1scl_rt clouds_ice(ncol, nlay, optical_props);
345+
}
346+
264347
// Set the mask.
265348
constexpr Float mask_min_value = Float(0.);
266349
const int block_col_m = 16;
@@ -272,22 +355,36 @@ void Cloud_optics_rt::cloud_optics(
272355
dim3 grid_m_gpu(grid_col_m, grid_lay_m);
273356
dim3 block_m_gpu(block_col_m, block_lay_m);
274357

275-
Array_gpu<Bool,2> liqmsk({ncol, nlay});
276-
set_mask<<<grid_m_gpu, block_m_gpu>>>(
277-
ncol, nlay, mask_min_value, liqmsk.ptr(), clwp.ptr());
278-
279-
Array_gpu<Bool,2> icemsk({ncol, nlay});
280-
set_mask<<<grid_m_gpu, block_m_gpu>>>(
281-
ncol, nlay, mask_min_value, icemsk.ptr(), ciwp.ptr());
282-
283358
// Temporary arrays for storage.
284-
Array_gpu<Float,2> ltau ({ncol, nlay});
285-
Array_gpu<Float,2> ltaussa ({ncol, nlay});
286-
Array_gpu<Float,2> ltaussag({ncol, nlay});
359+
Array_gpu<Bool,2> liqmsk({0, 0});
360+
Array_gpu<Float,2> ltau ({0, 0});
361+
Array_gpu<Float,2> ltaussa ({0, 0});
362+
Array_gpu<Float,2> ltaussag({0, 0});
363+
Array_gpu<Bool,2> icemsk({0, 0});
364+
Array_gpu<Float,2> itau ({0, 0});
365+
Array_gpu<Float,2> itaussa ({0, 0});
366+
Array_gpu<Float,2> itaussag({0, 0});
367+
368+
if (clwp.ptr() != nullptr)
369+
{
370+
liqmsk.set_dims({ncol, nlay});
371+
ltau.set_dims({ncol, nlay});
372+
ltaussa.set_dims({ncol, nlay});
373+
ltaussag.set_dims({ncol, nlay});
374+
375+
set_mask<<<grid_m_gpu, block_m_gpu>>>(
376+
ncol, nlay, mask_min_value, liqmsk.ptr(), clwp.ptr());
377+
}
378+
if (ciwp.ptr() != nullptr)
379+
{
380+
icemsk.set_dims({ncol, nlay});
381+
itau.set_dims({ncol, nlay});
382+
itaussa.set_dims({ncol, nlay});
383+
itaussag.set_dims({ncol, nlay});
287384

288-
Array_gpu<Float,2> itau ({ncol, nlay});
289-
Array_gpu<Float,2> itaussa ({ncol, nlay});
290-
Array_gpu<Float,2> itaussag({ncol, nlay});
385+
set_mask<<<grid_m_gpu, block_m_gpu>>>(
386+
ncol, nlay, mask_min_value, icemsk.ptr(), ciwp.ptr());
387+
}
291388

292389
const int block_col = 64;
293390
const int block_lay = 1;
@@ -299,25 +396,45 @@ void Cloud_optics_rt::cloud_optics(
299396
dim3 block_gpu(block_col, block_lay);
300397

301398
// Liquid water
302-
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
303-
ncol, nlay, ibnd-1, liqmsk.ptr(), clwp.ptr(), reliq.ptr(),
304-
this->liq_nsteps, this->liq_step_size, this->radliq_lwr,
305-
this->lut_extliq_gpu.ptr(), this->lut_ssaliq_gpu.ptr(),
306-
this->lut_asyliq_gpu.ptr(), ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
399+
if (clwp.ptr() != nullptr){
400+
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
401+
ncol, nlay, ibnd-1, liqmsk.ptr(), clwp.ptr(), reliq.ptr(),
402+
this->liq_nsteps, this->liq_step_size, this->radliq_lwr,
403+
this->lut_extliq_gpu.ptr(), this->lut_ssaliq_gpu.ptr(),
404+
this->lut_asyliq_gpu.ptr(), ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
405+
}
307406

308407
// Ice.
309-
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
310-
ncol, nlay, ibnd-1, icemsk.ptr(), ciwp.ptr(), reice.ptr(),
311-
this->ice_nsteps, this->ice_step_size, this->radice_lwr,
312-
this->lut_extice_gpu.ptr(), this->lut_ssaice_gpu.ptr(),
313-
this->lut_asyice_gpu.ptr(), itau.ptr(), itaussa.ptr(), itaussag.ptr());
408+
if (ciwp.ptr() != nullptr){
409+
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
410+
ncol, nlay, ibnd-1, icemsk.ptr(), ciwp.ptr(), reice.ptr(),
411+
this->ice_nsteps, this->ice_step_size, this->radice_lwr,
412+
this->lut_extice_gpu.ptr(), this->lut_ssaice_gpu.ptr(),
413+
this->lut_asyice_gpu.ptr(), itau.ptr(), itaussa.ptr(), itaussag.ptr());
414+
}
314415

315416
constexpr Float eps = std::numeric_limits<Float>::epsilon();
316-
317-
combine_and_store_kernel<<<grid_gpu, block_gpu>>>(
417+
if ((ciwp.ptr() != nullptr) && (clwp.ptr() != nullptr))
418+
{
419+
combine_and_store_kernel<<<grid_gpu, block_gpu>>>(
318420
ncol, nlay, eps,
319421
optical_props.get_tau().ptr(),
320422
ltau.ptr(), ltaussa.ptr(),
321423
itau.ptr(), itaussa.ptr());
424+
} else if(ciwp.ptr() == nullptr)
425+
{
426+
combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>>(
427+
ncol, nlay, eps,
428+
optical_props.get_tau().ptr(),
429+
ltau.ptr(), ltaussa.ptr());
430+
} else if(clwp.ptr() == nullptr)
431+
{
432+
combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>>(
433+
ncol, nlay, eps,
434+
optical_props.get_tau().ptr(),
435+
itau.ptr(), itaussa.ptr());
436+
437+
}
438+
322439
}
323440

src_test/Radiation_solver_rt.cu

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,7 @@ void Radiation_solver_shortwave::load_mie_tables(
567567

568568
void Radiation_solver_shortwave::solve_gpu(
569569
const bool switch_fluxes,
570+
const bool switch_disable_2s,
570571
const bool switch_raytracing,
571572
const bool switch_independent_column,
572573
const bool switch_cloud_optics,
@@ -636,10 +637,13 @@ void Radiation_solver_shortwave::solve_gpu(
636637

637638
if (switch_fluxes)
638639
{
639-
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_up.ptr());
640-
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_dn.ptr());
641-
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_dn_dir.ptr());
642-
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_net.ptr());
640+
if (!switch_disable_2s)
641+
{
642+
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_up.ptr());
643+
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_dn.ptr());
644+
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_dn_dir.ptr());
645+
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_net.ptr());
646+
}
643647
if (switch_raytracing)
644648
{
645649
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.y, grid_cells.x, rt_flux_tod_up.ptr());
@@ -836,10 +840,12 @@ void Radiation_solver_shortwave::solve_gpu(
836840
}
837841

838842
(*fluxes).net_flux();
839-
840-
Gpt_combine_kernels_cuda_rt::add_from_gpoint(
841-
n_col, n_lev, sw_flux_up.ptr(), sw_flux_dn.ptr(), sw_flux_dn_dir.ptr(), sw_flux_net.ptr(),
842-
(*fluxes).get_flux_up().ptr(), (*fluxes).get_flux_dn().ptr(), (*fluxes).get_flux_dn_dir().ptr(), (*fluxes).get_flux_net().ptr());
843+
if (!switch_disable_2s)
844+
{
845+
Gpt_combine_kernels_cuda_rt::add_from_gpoint(
846+
n_col, n_lev, sw_flux_up.ptr(), sw_flux_dn.ptr(), sw_flux_dn_dir.ptr(), sw_flux_net.ptr(),
847+
(*fluxes).get_flux_up().ptr(), (*fluxes).get_flux_dn().ptr(), (*fluxes).get_flux_dn_dir().ptr(), (*fluxes).get_flux_net().ptr());
848+
}
843849

844850
if (switch_raytracing)
845851
{

0 commit comments

Comments
 (0)