@@ -86,6 +86,23 @@ namespace
8686 }
8787 }
8888
89+ __global__
90+ void combine_and_store_kernel_single_phase (const int ncol, const int nlay, const Float tmin,
91+ Float* __restrict__ tau,
92+ const Float* __restrict__ l_or_i_tau, const Float* __restrict__ l_or_i_taussa)
93+ {
94+ const int icol = blockIdx .x *blockDim .x + threadIdx .x ;
95+ const int ilay = blockIdx .y *blockDim .y + threadIdx .y ;
96+
97+ if ( (icol < ncol) && (ilay < nlay) )
98+ {
99+ const int idx = icol + ilay*ncol;
100+ const Float tau_t = (l_or_i_tau[idx] - l_or_i_taussa[idx]);
101+
102+ tau[idx] = tau_t ;
103+ }
104+ }
105+
89106 __global__
90107 void combine_and_store_kernel (const int ncol, const int nlay, const Float tmin,
91108 Float* __restrict__ tau, Float* __restrict__ ssa, Float* __restrict__ g,
@@ -108,6 +125,28 @@ namespace
108125 }
109126 }
110127
128+ __global__
129+ void combine_and_store_kernel_single_phase (const int ncol, const int nlay, const Float tmin,
130+ Float* __restrict__ tau, Float* __restrict__ ssa, Float* __restrict__ g,
131+ const Float* __restrict__ l_or_i_tau, const Float* __restrict__ l_or_i_taussa, const Float* __restrict__ l_or_i_taussag
132+ )
133+ {
134+ const int icol = blockIdx .x *blockDim .x + threadIdx .x ;
135+ const int ilay = blockIdx .y *blockDim .y + threadIdx .y ;
136+
137+ if ( (icol < ncol) && (ilay < nlay) )
138+ {
139+ const int idx = icol + ilay*ncol;
140+ const Float tau_t = l_or_i_tau[idx];
141+ const Float taussa = l_or_i_taussa[idx];
142+ const Float taussag = l_or_i_taussag[idx];
143+
144+ tau[idx] = tau_t ;
145+ ssa[idx] = taussa / max (tau_t , tmin);
146+ g[idx] = taussag/ max (taussa, tmin);
147+ }
148+ }
149+
111150 __global__
112151 void set_mask (const int ncol, const int nlay, const Float min_value,
113152 Bool* __restrict__ mask, const Float* __restrict__ values)
@@ -182,11 +221,19 @@ void Cloud_optics_rt::cloud_optics(
182221 const Array_gpu<Float,2 >& reliq, const Array_gpu<Float,2 >& reice,
183222 Optical_props_2str_rt& optical_props)
184223{
185- const int ncol = clwp.dim (1 );
186- const int nlay = clwp.dim (2 );
187-
188- Optical_props_2str_rt clouds_liq (ncol, nlay, optical_props);
189- Optical_props_2str_rt clouds_ice (ncol, nlay, optical_props);
224+ int ncol = -1 ;
225+ int nlay = -1 ;
226+ if (clwp.ptr () != nullptr )
227+ {
228+ ncol = clwp.dim (1 );
229+ nlay = clwp.dim (2 );
230+ Optical_props_2str_rt clouds_liq (ncol, nlay, optical_props);
231+ } else if (ciwp.ptr () != nullptr )
232+ {
233+ ncol = ciwp.dim (1 );
234+ nlay = ciwp.dim (2 );
235+ Optical_props_2str_rt clouds_ice (ncol, nlay, optical_props);
236+ }
190237
191238 // Set the mask.
192239 constexpr Float mask_min_value = Float (0 .);
@@ -199,53 +246,81 @@ void Cloud_optics_rt::cloud_optics(
199246 dim3 grid_m_gpu (grid_col_m, grid_lay_m);
200247 dim3 block_m_gpu (block_col_m, block_lay_m);
201248
202- Array_gpu<Bool,2 > liqmsk ({ncol, nlay});
203- set_mask<<<grid_m_gpu, block_m_gpu>>> (
204- ncol, nlay, mask_min_value, liqmsk.ptr (), clwp.ptr ());
205-
206- Array_gpu<Bool,2 > icemsk ({ncol, nlay});
207- set_mask<<<grid_m_gpu, block_m_gpu>>> (
208- ncol, nlay, mask_min_value, icemsk.ptr (), ciwp.ptr ());
209249
210250 // Temporary arrays for storage.
211- Array_gpu<Float,2 > ltau ({ncol, nlay});
212- Array_gpu<Float,2 > ltaussa ({ncol, nlay});
213- Array_gpu<Float,2 > ltaussag ({ncol, nlay});
214-
215- Array_gpu<Float,2 > itau ({ncol, nlay});
216- Array_gpu<Float,2 > itaussa ({ncol, nlay});
217- Array_gpu<Float,2 > itaussag ({ncol, nlay});
218-
251+ Array_gpu<Bool,2 > liqmsk ({0 , 0 });
252+ Array_gpu<Float,2 > ltau ({0 , 0 });
253+ Array_gpu<Float,2 > ltaussa ({0 , 0 });
254+ Array_gpu<Float,2 > ltaussag ({0 , 0 });
255+ Array_gpu<Bool,2 > icemsk ({0 , 0 });
256+ Array_gpu<Float,2 > itau ({0 , 0 });
257+ Array_gpu<Float,2 > itaussa ({0 , 0 });
258+ Array_gpu<Float,2 > itaussag ({0 , 0 });
259+ if (clwp.ptr () != nullptr ){
260+ liqmsk.set_dims ({ncol, nlay});
261+ ltau.set_dims ({ncol, nlay});
262+ ltaussa.set_dims ({ncol, nlay});
263+ ltaussag.set_dims ({ncol, nlay});
264+
265+ set_mask<<<grid_m_gpu, block_m_gpu>>> (
266+ ncol, nlay, mask_min_value, liqmsk.ptr (), clwp.ptr ());
267+ }
268+ if (ciwp.ptr () != nullptr ){
269+ icemsk.set_dims ({ncol, nlay});
270+ itau.set_dims ({ncol, nlay});
271+ itaussa.set_dims ({ncol, nlay});
272+ itaussag.set_dims ({ncol, nlay});
273+
274+ set_mask<<<grid_m_gpu, block_m_gpu>>> (
275+ ncol, nlay, mask_min_value, icemsk.ptr (), ciwp.ptr ());
276+ }
219277 const int block_col = 64 ;
220278 const int block_lay = 1 ;
221-
222279 const int grid_col = ncol/block_col + (ncol%block_col > 0 );
223280 const int grid_lay = nlay/block_lay + (nlay%block_lay > 0 );
224281
225282 dim3 grid_gpu (grid_col, grid_lay);
226283 dim3 block_gpu (block_col, block_lay);
227284
228285 // Liquid water
229- compute_from_table_kernel<<<grid_gpu, block_gpu>>> (
230- ncol, nlay, ibnd-1 , liqmsk.ptr (), clwp.ptr (), reliq.ptr (),
231- this ->liq_nsteps , this ->liq_step_size , this ->radliq_lwr ,
232- this ->lut_extliq_gpu .ptr (), this ->lut_ssaliq_gpu .ptr (),
233- this ->lut_asyliq_gpu .ptr (), ltau.ptr (), ltaussa.ptr (), ltaussag.ptr ());
286+ if (clwp.ptr () != nullptr ){
287+ compute_from_table_kernel<<<grid_gpu, block_gpu>>> (
288+ ncol, nlay, ibnd-1 , liqmsk.ptr (), clwp.ptr (), reliq.ptr (),
289+ this ->liq_nsteps , this ->liq_step_size , this ->radliq_lwr ,
290+ this ->lut_extliq_gpu .ptr (), this ->lut_ssaliq_gpu .ptr (),
291+ this ->lut_asyliq_gpu .ptr (), ltau.ptr (), ltaussa.ptr (), ltaussag.ptr ());
292+ }
234293
235294 // Ice.
236- compute_from_table_kernel<<<grid_gpu, block_gpu>>> (
237- ncol, nlay, ibnd-1 , icemsk.ptr (), ciwp.ptr (), reice.ptr (),
238- this ->ice_nsteps , this ->ice_step_size , this ->radice_lwr ,
239- this ->lut_extice_gpu .ptr (), this ->lut_ssaice_gpu .ptr (),
240- this ->lut_asyice_gpu .ptr (), itau.ptr (), itaussa.ptr (), itaussag.ptr ());
241-
295+ if (ciwp.ptr () != nullptr ){
296+ compute_from_table_kernel<<<grid_gpu, block_gpu>>> (
297+ ncol, nlay, ibnd-1 , icemsk.ptr (), ciwp.ptr (), reice.ptr (),
298+ this ->ice_nsteps , this ->ice_step_size , this ->radice_lwr ,
299+ this ->lut_extice_gpu .ptr (), this ->lut_ssaice_gpu .ptr (),
300+ this ->lut_asyice_gpu .ptr (), itau.ptr (), itaussa.ptr (), itaussag.ptr ());
301+ }
242302 constexpr Float eps = std::numeric_limits<Float>::epsilon ();
243-
244- combine_and_store_kernel<<<grid_gpu, block_gpu>>> (
303+ if ((ciwp.ptr () != nullptr ) && (clwp.ptr () != nullptr ))
304+ {
305+ combine_and_store_kernel<<<grid_gpu, block_gpu>>> (
245306 ncol, nlay, eps,
246307 optical_props.get_tau ().ptr (), optical_props.get_ssa ().ptr (), optical_props.get_g ().ptr (),
247308 ltau.ptr (), ltaussa.ptr (), ltaussag.ptr (),
248309 itau.ptr (), itaussa.ptr (), itaussag.ptr ());
310+ } else if (ciwp.ptr () == nullptr )
311+ {
312+ combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>> (
313+ ncol, nlay, eps,
314+ optical_props.get_tau ().ptr (), optical_props.get_ssa ().ptr (), optical_props.get_g ().ptr (),
315+ ltau.ptr (), ltaussa.ptr (), ltaussag.ptr ());
316+ } else if (clwp.ptr () == nullptr )
317+ {
318+ combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>> (
319+ ncol, nlay, eps,
320+ optical_props.get_tau ().ptr (), optical_props.get_ssa ().ptr (), optical_props.get_g ().ptr (),
321+ itau.ptr (), itaussa.ptr (), itaussag.ptr ());
322+ }
323+
249324}
250325
251326// 1scl variant of cloud optics.
@@ -255,12 +330,20 @@ void Cloud_optics_rt::cloud_optics(
255330 const Array_gpu<Float,2 >& reliq, const Array_gpu<Float,2 >& reice,
256331 Optical_props_1scl_rt& optical_props)
257332{
258- const int ncol = clwp.dim (1 );
259- const int nlay = clwp.dim (2 );
260-
261- Optical_props_1scl_rt clouds_liq (ncol, nlay, optical_props);
262- Optical_props_1scl_rt clouds_ice (ncol, nlay, optical_props);
263-
333+ int ncol = -1 ;
334+ int nlay = -1 ;
335+ if (clwp.ptr () != nullptr )
336+ {
337+ ncol = clwp.dim (1 );
338+ nlay = clwp.dim (2 );
339+ Optical_props_1scl_rt clouds_liq (ncol, nlay, optical_props);
340+ } else if (ciwp.ptr () != nullptr )
341+ {
342+ ncol = ciwp.dim (1 );
343+ nlay = ciwp.dim (2 );
344+ Optical_props_1scl_rt clouds_ice (ncol, nlay, optical_props);
345+ }
346+
264347 // Set the mask.
265348 constexpr Float mask_min_value = Float (0 .);
266349 const int block_col_m = 16 ;
@@ -272,22 +355,36 @@ void Cloud_optics_rt::cloud_optics(
272355 dim3 grid_m_gpu (grid_col_m, grid_lay_m);
273356 dim3 block_m_gpu (block_col_m, block_lay_m);
274357
275- Array_gpu<Bool,2 > liqmsk ({ncol, nlay});
276- set_mask<<<grid_m_gpu, block_m_gpu>>> (
277- ncol, nlay, mask_min_value, liqmsk.ptr (), clwp.ptr ());
278-
279- Array_gpu<Bool,2 > icemsk ({ncol, nlay});
280- set_mask<<<grid_m_gpu, block_m_gpu>>> (
281- ncol, nlay, mask_min_value, icemsk.ptr (), ciwp.ptr ());
282-
283358 // Temporary arrays for storage.
284- Array_gpu<Float,2 > ltau ({ncol, nlay});
285- Array_gpu<Float,2 > ltaussa ({ncol, nlay});
286- Array_gpu<Float,2 > ltaussag ({ncol, nlay});
359+ Array_gpu<Bool,2 > liqmsk ({0 , 0 });
360+ Array_gpu<Float,2 > ltau ({0 , 0 });
361+ Array_gpu<Float,2 > ltaussa ({0 , 0 });
362+ Array_gpu<Float,2 > ltaussag ({0 , 0 });
363+ Array_gpu<Bool,2 > icemsk ({0 , 0 });
364+ Array_gpu<Float,2 > itau ({0 , 0 });
365+ Array_gpu<Float,2 > itaussa ({0 , 0 });
366+ Array_gpu<Float,2 > itaussag ({0 , 0 });
367+
368+ if (clwp.ptr () != nullptr )
369+ {
370+ liqmsk.set_dims ({ncol, nlay});
371+ ltau.set_dims ({ncol, nlay});
372+ ltaussa.set_dims ({ncol, nlay});
373+ ltaussag.set_dims ({ncol, nlay});
374+
375+ set_mask<<<grid_m_gpu, block_m_gpu>>> (
376+ ncol, nlay, mask_min_value, liqmsk.ptr (), clwp.ptr ());
377+ }
378+ if (ciwp.ptr () != nullptr )
379+ {
380+ icemsk.set_dims ({ncol, nlay});
381+ itau.set_dims ({ncol, nlay});
382+ itaussa.set_dims ({ncol, nlay});
383+ itaussag.set_dims ({ncol, nlay});
287384
288- Array_gpu<Float, 2 > itau ({ncol, nlay});
289- Array_gpu<Float, 2 > itaussa ({ ncol, nlay} );
290- Array_gpu<Float, 2 > itaussag ({ncol, nlay});
385+ set_mask <<<grid_m_gpu, block_m_gpu>>> (
386+ ncol, nlay, mask_min_value, icemsk. ptr (), ciwp. ptr () );
387+ }
291388
292389 const int block_col = 64 ;
293390 const int block_lay = 1 ;
@@ -299,25 +396,45 @@ void Cloud_optics_rt::cloud_optics(
299396 dim3 block_gpu (block_col, block_lay);
300397
301398 // Liquid water
302- compute_from_table_kernel<<<grid_gpu, block_gpu>>> (
303- ncol, nlay, ibnd-1 , liqmsk.ptr (), clwp.ptr (), reliq.ptr (),
304- this ->liq_nsteps , this ->liq_step_size , this ->radliq_lwr ,
305- this ->lut_extliq_gpu .ptr (), this ->lut_ssaliq_gpu .ptr (),
306- this ->lut_asyliq_gpu .ptr (), ltau.ptr (), ltaussa.ptr (), ltaussag.ptr ());
399+ if (clwp.ptr () != nullptr ){
400+ compute_from_table_kernel<<<grid_gpu, block_gpu>>> (
401+ ncol, nlay, ibnd-1 , liqmsk.ptr (), clwp.ptr (), reliq.ptr (),
402+ this ->liq_nsteps , this ->liq_step_size , this ->radliq_lwr ,
403+ this ->lut_extliq_gpu .ptr (), this ->lut_ssaliq_gpu .ptr (),
404+ this ->lut_asyliq_gpu .ptr (), ltau.ptr (), ltaussa.ptr (), ltaussag.ptr ());
405+ }
307406
308407 // Ice.
309- compute_from_table_kernel<<<grid_gpu, block_gpu>>> (
310- ncol, nlay, ibnd-1 , icemsk.ptr (), ciwp.ptr (), reice.ptr (),
311- this ->ice_nsteps , this ->ice_step_size , this ->radice_lwr ,
312- this ->lut_extice_gpu .ptr (), this ->lut_ssaice_gpu .ptr (),
313- this ->lut_asyice_gpu .ptr (), itau.ptr (), itaussa.ptr (), itaussag.ptr ());
408+ if (ciwp.ptr () != nullptr ){
409+ compute_from_table_kernel<<<grid_gpu, block_gpu>>> (
410+ ncol, nlay, ibnd-1 , icemsk.ptr (), ciwp.ptr (), reice.ptr (),
411+ this ->ice_nsteps , this ->ice_step_size , this ->radice_lwr ,
412+ this ->lut_extice_gpu .ptr (), this ->lut_ssaice_gpu .ptr (),
413+ this ->lut_asyice_gpu .ptr (), itau.ptr (), itaussa.ptr (), itaussag.ptr ());
414+ }
314415
315416 constexpr Float eps = std::numeric_limits<Float>::epsilon ();
316-
317- combine_and_store_kernel<<<grid_gpu, block_gpu>>> (
417+ if ((ciwp.ptr () != nullptr ) && (clwp.ptr () != nullptr ))
418+ {
419+ combine_and_store_kernel<<<grid_gpu, block_gpu>>> (
318420 ncol, nlay, eps,
319421 optical_props.get_tau ().ptr (),
320422 ltau.ptr (), ltaussa.ptr (),
321423 itau.ptr (), itaussa.ptr ());
424+ } else if (ciwp.ptr () == nullptr )
425+ {
426+ combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>> (
427+ ncol, nlay, eps,
428+ optical_props.get_tau ().ptr (),
429+ ltau.ptr (), ltaussa.ptr ());
430+ } else if (clwp.ptr () == nullptr )
431+ {
432+ combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>> (
433+ ncol, nlay, eps,
434+ optical_props.get_tau ().ptr (),
435+ itau.ptr (), itaussa.ptr ());
436+
437+ }
438+
322439}
323440
0 commit comments