Skip to content

Commit d4aa235

Browse files
committed
rename single-phase kernels, remove l_or_i prefix in single-phase kernels and write in output tau directly as we do not need to sum liquid and ice, saving one temporary array, read ncol/nlay from optical props input
1 parent 30cd2e5 commit d4aa235

File tree

1 file changed

+138
-81
lines changed

1 file changed

+138
-81
lines changed

src_cuda_rt/Cloud_optics_rt.cu

Lines changed: 138 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -87,19 +87,17 @@ namespace
8787
}
8888

8989
__global__
90-
void combine_and_store_kernel_single_phase(const int ncol, const int nlay, const Float tmin,
91-
Float* __restrict__ tau,
92-
const Float* __restrict__ l_or_i_tau, const Float* __restrict__ l_or_i_taussa)
90+
void store_single_phase_kernel(const int ncol, const int nlay, const Float tmin,
91+
Float* __restrict__ tau, const Float* taussa)
9392
{
9493
const int icol = blockIdx.x*blockDim.x + threadIdx.x;
9594
const int ilay = blockIdx.y*blockDim.y + threadIdx.y;
9695

9796
if ( (icol < ncol) && (ilay < nlay) )
9897
{
9998
const int idx = icol + ilay*ncol;
100-
const Float tau_t = (l_or_i_tau[idx] - l_or_i_taussa[idx]);
10199

102-
tau[idx] = tau_t;
100+
tau[idx] -= taussa[idx];
103101
}
104102
}
105103

@@ -126,9 +124,9 @@ namespace
126124
}
127125

128126
__global__
129-
void combine_and_store_kernel_single_phase(const int ncol, const int nlay, const Float tmin,
127+
void store_single_phase_kernel(const int ncol, const int nlay, const Float tmin,
130128
Float* __restrict__ tau, Float* __restrict__ ssa, Float* __restrict__ g,
131-
const Float* __restrict__ l_or_i_tau, const Float* __restrict__ l_or_i_taussa, const Float* __restrict__ l_or_i_taussag
129+
const Float* __restrict__ taussa, const Float* __restrict__ taussag
132130
)
133131
{
134132
const int icol = blockIdx.x*blockDim.x + threadIdx.x;
@@ -137,13 +135,9 @@ namespace
137135
if ( (icol < ncol) && (ilay < nlay) )
138136
{
139137
const int idx = icol + ilay*ncol;
140-
const Float tau_t = l_or_i_tau[idx];
141-
const Float taussa = l_or_i_taussa[idx];
142-
const Float taussag = l_or_i_taussag[idx];
143138

144-
tau[idx] = tau_t;
145-
ssa[idx] = taussa / max(tau_t, tmin);
146-
g[idx] = taussag/ max(taussa, tmin);
139+
ssa[idx] = taussa[idx] / max(tau[idx], tmin);
140+
g[idx] = taussag[idx] / max(taussa[idx], tmin);
147141
}
148142
}
149143

@@ -221,19 +215,8 @@ void Cloud_optics_rt::cloud_optics(
221215
const Array_gpu<Float,2>& reliq, const Array_gpu<Float,2>& reice,
222216
Optical_props_2str_rt& optical_props)
223217
{
224-
int ncol = -1;
225-
int nlay = -1;
226-
if (clwp.ptr() != nullptr)
227-
{
228-
ncol = clwp.dim(1);
229-
nlay = clwp.dim(2);
230-
Optical_props_2str_rt clouds_liq(ncol, nlay, optical_props);
231-
} else if (ciwp.ptr() != nullptr)
232-
{
233-
ncol = ciwp.dim(1);
234-
nlay = ciwp.dim(2);
235-
Optical_props_2str_rt clouds_ice(ncol, nlay, optical_props);
236-
}
218+
int ncol = optical_props.get_tau().dim(1);
219+
int nlay = optical_props.get_tau().dim(2);
237220

238221
// Set the mask.
239222
constexpr Float mask_min_value = Float(0.);
@@ -247,33 +230,60 @@ void Cloud_optics_rt::cloud_optics(
247230
dim3 block_m_gpu(block_col_m, block_lay_m);
248231

249232

250-
// Temporary arrays for storage.
233+
// Temporary arrays for storage, liquid and ice seperately if both are present
251234
Array_gpu<Bool,2> liqmsk({0, 0});
252235
Array_gpu<Float,2> ltau ({0, 0});
253236
Array_gpu<Float,2> ltaussa ({0, 0});
254237
Array_gpu<Float,2> ltaussag({0, 0});
238+
255239
Array_gpu<Bool,2> icemsk({0, 0});
256240
Array_gpu<Float,2> itau ({0, 0});
257241
Array_gpu<Float,2> itaussa ({0, 0});
258242
Array_gpu<Float,2> itaussag({0, 0});
259-
if (clwp.ptr() != nullptr){
243+
244+
// Temporary arrays for storage, only on set needed if either liquid or ice is present
245+
Array_gpu<Bool,2> msk({0, 0});
246+
Array_gpu<Float,2> taussa ({0, 0});
247+
Array_gpu<Float,2> taussag({0, 0});
248+
249+
if ((clwp.ptr() != nullptr) && (ciwp.ptr() != nullptr))
250+
{
260251
liqmsk.set_dims({ncol, nlay});
261252
ltau.set_dims({ncol, nlay});
262253
ltaussa.set_dims({ncol, nlay});
263254
ltaussag.set_dims({ncol, nlay});
264-
265-
set_mask<<<grid_m_gpu, block_m_gpu>>>(
266-
ncol, nlay, mask_min_value, liqmsk.ptr(), clwp.ptr());
267-
}
268-
if (ciwp.ptr() != nullptr){
255+
269256
icemsk.set_dims({ncol, nlay});
270257
itau.set_dims({ncol, nlay});
271258
itaussa.set_dims({ncol, nlay});
272259
itaussag.set_dims({ncol, nlay});
273260

261+
set_mask<<<grid_m_gpu, block_m_gpu>>>(
262+
ncol, nlay, mask_min_value, liqmsk.ptr(), clwp.ptr());
263+
274264
set_mask<<<grid_m_gpu, block_m_gpu>>>(
275265
ncol, nlay, mask_min_value, icemsk.ptr(), ciwp.ptr());
276266
}
267+
else if (clwp.ptr() != nullptr)
268+
{
269+
msk.set_dims({ncol, nlay});
270+
taussa.set_dims({ncol, nlay});
271+
taussag.set_dims({ncol, nlay});
272+
273+
set_mask<<<grid_m_gpu, block_m_gpu>>>(
274+
ncol, nlay, mask_min_value, msk.ptr(), clwp.ptr());
275+
}
276+
else if (ciwp.ptr() != nullptr)
277+
{
278+
msk.set_dims({ncol, nlay});
279+
taussa.set_dims({ncol, nlay});
280+
taussag.set_dims({ncol, nlay});
281+
282+
set_mask<<<grid_m_gpu, block_m_gpu>>>(
283+
ncol, nlay, mask_min_value, msk.ptr(), ciwp.ptr());
284+
}
285+
286+
277287
const int block_col = 64;
278288
const int block_lay = 1;
279289
const int grid_col = ncol/block_col + (ncol%block_col > 0);
@@ -282,23 +292,41 @@ void Cloud_optics_rt::cloud_optics(
282292
dim3 grid_gpu(grid_col, grid_lay);
283293
dim3 block_gpu(block_col, block_lay);
284294

285-
// Liquid water
286-
if (clwp.ptr() != nullptr){
295+
// Liquid water and ice
296+
if ((clwp.ptr() != nullptr) && (ciwp.ptr() != nullptr))
297+
{
287298
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
288299
ncol, nlay, ibnd-1, liqmsk.ptr(), clwp.ptr(), reliq.ptr(),
289300
this->liq_nsteps, this->liq_step_size, this->radliq_lwr,
290301
this->lut_extliq_gpu.ptr(), this->lut_ssaliq_gpu.ptr(),
291302
this->lut_asyliq_gpu.ptr(), ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
292-
}
293-
294-
// Ice.
295-
if (ciwp.ptr() != nullptr){
303+
296304
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
297305
ncol, nlay, ibnd-1, icemsk.ptr(), ciwp.ptr(), reice.ptr(),
298306
this->ice_nsteps, this->ice_step_size, this->radice_lwr,
299307
this->lut_extice_gpu.ptr(), this->lut_ssaice_gpu.ptr(),
300308
this->lut_asyice_gpu.ptr(), itau.ptr(), itaussa.ptr(), itaussag.ptr());
301309
}
310+
// liquid only
311+
else if (clwp.ptr() != nullptr)
312+
{
313+
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
314+
ncol, nlay, ibnd-1, msk.ptr(), clwp.ptr(), reliq.ptr(),
315+
this->liq_nsteps, this->liq_step_size, this->radliq_lwr,
316+
this->lut_extliq_gpu.ptr(), this->lut_ssaliq_gpu.ptr(),
317+
this->lut_asyliq_gpu.ptr(), optical_props.get_tau().ptr(), taussa.ptr(), taussag.ptr());
318+
}
319+
// Ice only
320+
else if (ciwp.ptr() != nullptr)
321+
{
322+
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
323+
ncol, nlay, ibnd-1, msk.ptr(), ciwp.ptr(), reice.ptr(),
324+
this->ice_nsteps, this->ice_step_size, this->radice_lwr,
325+
this->lut_extice_gpu.ptr(), this->lut_ssaice_gpu.ptr(),
326+
this->lut_asyice_gpu.ptr(), optical_props.get_tau().ptr(), taussa.ptr(), taussag.ptr());
327+
}
328+
329+
302330
constexpr Float eps = std::numeric_limits<Float>::epsilon();
303331
if ((ciwp.ptr() != nullptr) && (clwp.ptr() != nullptr))
304332
{
@@ -307,18 +335,20 @@ void Cloud_optics_rt::cloud_optics(
307335
optical_props.get_tau().ptr(), optical_props.get_ssa().ptr(), optical_props.get_g().ptr(),
308336
ltau.ptr(), ltaussa.ptr(), ltaussag.ptr(),
309337
itau.ptr(), itaussa.ptr(), itaussag.ptr());
310-
} else if(ciwp.ptr() == nullptr)
338+
}
339+
else if (clwp.ptr() != nullptr)
311340
{
312-
combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>>(
341+
store_single_phase_kernel<<<grid_gpu, block_gpu>>>(
313342
ncol, nlay, eps,
314343
optical_props.get_tau().ptr(), optical_props.get_ssa().ptr(), optical_props.get_g().ptr(),
315-
ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
316-
} else if (clwp.ptr() == nullptr)
344+
taussa.ptr(), taussag.ptr());
345+
}
346+
else if (ciwp.ptr() != nullptr)
317347
{
318-
combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>>(
348+
store_single_phase_kernel<<<grid_gpu, block_gpu>>>(
319349
ncol, nlay, eps,
320350
optical_props.get_tau().ptr(), optical_props.get_ssa().ptr(), optical_props.get_g().ptr(),
321-
itau.ptr(), itaussa.ptr(), itaussag.ptr());
351+
taussa.ptr(), taussag.ptr());
322352
}
323353

324354
}
@@ -330,19 +360,8 @@ void Cloud_optics_rt::cloud_optics(
330360
const Array_gpu<Float,2>& reliq, const Array_gpu<Float,2>& reice,
331361
Optical_props_1scl_rt& optical_props)
332362
{
333-
int ncol = -1;
334-
int nlay = -1;
335-
if (clwp.ptr() != nullptr)
336-
{
337-
ncol = clwp.dim(1);
338-
nlay = clwp.dim(2);
339-
Optical_props_1scl_rt clouds_liq(ncol, nlay, optical_props);
340-
} else if (ciwp.ptr() != nullptr)
341-
{
342-
ncol = ciwp.dim(1);
343-
nlay = ciwp.dim(2);
344-
Optical_props_1scl_rt clouds_ice(ncol, nlay, optical_props);
345-
}
363+
const int ncol = optical_props.get_tau().dim(1);
364+
const int nlay = optical_props.get_tau().dim(2);
346365

347366
// Set the mask.
348367
constexpr Float mask_min_value = Float(0.);
@@ -355,36 +374,58 @@ void Cloud_optics_rt::cloud_optics(
355374
dim3 grid_m_gpu(grid_col_m, grid_lay_m);
356375
dim3 block_m_gpu(block_col_m, block_lay_m);
357376

358-
// Temporary arrays for storage.
377+
// Temporary arrays for storage, liquid and ice seperately if both are present
359378
Array_gpu<Bool,2> liqmsk({0, 0});
360379
Array_gpu<Float,2> ltau ({0, 0});
361380
Array_gpu<Float,2> ltaussa ({0, 0});
362381
Array_gpu<Float,2> ltaussag({0, 0});
382+
363383
Array_gpu<Bool,2> icemsk({0, 0});
364384
Array_gpu<Float,2> itau ({0, 0});
365385
Array_gpu<Float,2> itaussa ({0, 0});
366386
Array_gpu<Float,2> itaussag({0, 0});
367-
368-
if (clwp.ptr() != nullptr)
387+
388+
// Temporary arrays for storage, only on set needed if either liquid or ice is present
389+
Array_gpu<Bool,2> msk({0, 0});
390+
Array_gpu<Float,2> taussa ({0, 0});
391+
Array_gpu<Float,2> taussag({0, 0});
392+
393+
if ((clwp.ptr() != nullptr) && (ciwp.ptr() != nullptr))
369394
{
370395
liqmsk.set_dims({ncol, nlay});
371396
ltau.set_dims({ncol, nlay});
372397
ltaussa.set_dims({ncol, nlay});
373398
ltaussag.set_dims({ncol, nlay});
374-
375-
set_mask<<<grid_m_gpu, block_m_gpu>>>(
376-
ncol, nlay, mask_min_value, liqmsk.ptr(), clwp.ptr());
377-
}
378-
if (ciwp.ptr() != nullptr)
379-
{
399+
380400
icemsk.set_dims({ncol, nlay});
381401
itau.set_dims({ncol, nlay});
382402
itaussa.set_dims({ncol, nlay});
383403
itaussag.set_dims({ncol, nlay});
384404

405+
set_mask<<<grid_m_gpu, block_m_gpu>>>(
406+
ncol, nlay, mask_min_value, liqmsk.ptr(), clwp.ptr());
407+
385408
set_mask<<<grid_m_gpu, block_m_gpu>>>(
386409
ncol, nlay, mask_min_value, icemsk.ptr(), ciwp.ptr());
387410
}
411+
else if (clwp.ptr() != nullptr)
412+
{
413+
msk.set_dims({ncol, nlay});
414+
taussa.set_dims({ncol, nlay});
415+
taussag.set_dims({ncol, nlay});
416+
417+
set_mask<<<grid_m_gpu, block_m_gpu>>>(
418+
ncol, nlay, mask_min_value, msk.ptr(), clwp.ptr());
419+
}
420+
else if (ciwp.ptr() != nullptr)
421+
{
422+
msk.set_dims({ncol, nlay});
423+
taussa.set_dims({ncol, nlay});
424+
taussag.set_dims({ncol, nlay});
425+
426+
set_mask<<<grid_m_gpu, block_m_gpu>>>(
427+
ncol, nlay, mask_min_value, msk.ptr(), ciwp.ptr());
428+
}
388429

389430
const int block_col = 64;
390431
const int block_lay = 1;
@@ -395,23 +436,39 @@ void Cloud_optics_rt::cloud_optics(
395436
dim3 grid_gpu(grid_col, grid_lay);
396437
dim3 block_gpu(block_col, block_lay);
397438

398-
// Liquid water
399-
if (clwp.ptr() != nullptr){
439+
// Liquid water and ice
440+
if ((clwp.ptr() != nullptr) && (ciwp.ptr() != nullptr))
441+
{
400442
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
401443
ncol, nlay, ibnd-1, liqmsk.ptr(), clwp.ptr(), reliq.ptr(),
402444
this->liq_nsteps, this->liq_step_size, this->radliq_lwr,
403445
this->lut_extliq_gpu.ptr(), this->lut_ssaliq_gpu.ptr(),
404446
this->lut_asyliq_gpu.ptr(), ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
405-
}
406-
407-
// Ice.
408-
if (ciwp.ptr() != nullptr){
447+
409448
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
410449
ncol, nlay, ibnd-1, icemsk.ptr(), ciwp.ptr(), reice.ptr(),
411450
this->ice_nsteps, this->ice_step_size, this->radice_lwr,
412451
this->lut_extice_gpu.ptr(), this->lut_ssaice_gpu.ptr(),
413452
this->lut_asyice_gpu.ptr(), itau.ptr(), itaussa.ptr(), itaussag.ptr());
414453
}
454+
// Liquid only
455+
else if (clwp.ptr() != nullptr)
456+
{
457+
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
458+
ncol, nlay, ibnd-1, msk.ptr(), clwp.ptr(), reliq.ptr(),
459+
this->liq_nsteps, this->liq_step_size, this->radliq_lwr,
460+
this->lut_extliq_gpu.ptr(), this->lut_ssaliq_gpu.ptr(),
461+
this->lut_asyliq_gpu.ptr(), optical_props.get_tau().ptr(), taussa.ptr(), taussag.ptr());
462+
}
463+
// Ice.
464+
else if (ciwp.ptr() != nullptr)
465+
{
466+
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
467+
ncol, nlay, ibnd-1, msk.ptr(), ciwp.ptr(), reice.ptr(),
468+
this->ice_nsteps, this->ice_step_size, this->radice_lwr,
469+
this->lut_extice_gpu.ptr(), this->lut_ssaice_gpu.ptr(),
470+
this->lut_asyice_gpu.ptr(), optical_props.get_tau().ptr(), taussa.ptr(), taussag.ptr());
471+
}
415472

416473
constexpr Float eps = std::numeric_limits<Float>::epsilon();
417474
if ((ciwp.ptr() != nullptr) && (clwp.ptr() != nullptr))
@@ -421,18 +478,18 @@ void Cloud_optics_rt::cloud_optics(
421478
optical_props.get_tau().ptr(),
422479
ltau.ptr(), ltaussa.ptr(),
423480
itau.ptr(), itaussa.ptr());
424-
} else if(ciwp.ptr() == nullptr)
481+
}
482+
else if(clwp.ptr() != nullptr)
425483
{
426-
combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>>(
484+
store_single_phase_kernel<<<grid_gpu, block_gpu>>>(
427485
ncol, nlay, eps,
428-
optical_props.get_tau().ptr(),
429-
ltau.ptr(), ltaussa.ptr());
430-
} else if(clwp.ptr() == nullptr)
486+
optical_props.get_tau().ptr(), taussa.ptr());
487+
}
488+
else if(ciwp.ptr() != nullptr)
431489
{
432-
combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>>(
490+
store_single_phase_kernel<<<grid_gpu, block_gpu>>>(
433491
ncol, nlay, eps,
434-
optical_props.get_tau().ptr(),
435-
itau.ptr(), itaussa.ptr());
492+
optical_props.get_tau().ptr(), taussa.ptr());
436493

437494
}
438495

0 commit comments

Comments
 (0)