3434#include " Fluxes_rt.h"
3535#include " Rte_lw_rt.h"
3636#include " Rte_sw_rt.h"
37+ #include " subset_kernel_launcher_cuda.h"
3738#include " rrtmgp_kernel_launcher_cuda_rt.h"
3839#include " gpt_combine_kernel_launcher_cuda_rt.h"
3940
@@ -571,8 +572,8 @@ void Radiation_solver_shortwave::solve_gpu(
571572 const Array_gpu<int ,1 >& kn_grid_dims,
572573 Array_gpu<Float,2 >& col_dry,
573574 const Array_gpu<Float,2 >& sfc_alb_dir, const Array_gpu<Float,2 >& sfc_alb_dif,
574- const Array_gpu<Float,1 >& tsi_scaling,
575- const Array_gpu<Float,1 >& mu0, const Array_gpu<Float,1 >& azi,
575+ const Array_gpu<Float,1 >& tsi_scaling,
576+ const Array_gpu<Float,1 >& mu0, const Array_gpu<Float,1 >& azi,
576577 const Array_gpu<Float,2 >& lwp, const Array_gpu<Float,2 >& iwp,
577578 const Array_gpu<Float,2 >& rel, const Array_gpu<Float,2 >& rei,
578579 Array_gpu<Float,3 >& tau, Array_gpu<Float,3 >& ssa, Array_gpu<Float,3 >& g,
@@ -651,6 +652,7 @@ void Radiation_solver_shortwave::solve_gpu(
651652 }
652653 }
653654
655+ /*
654656 kdist_gpu->gas_optics(
655657 igpt-1,
656658 p_lay,
@@ -660,7 +662,63 @@ void Radiation_solver_shortwave::solve_gpu(
660662 optical_props,
661663 toa_src,
662664 col_dry);
663- scaling_to_subset (n_col, n_gpt, toa_src, tsi_scaling);
665+ */
666+
667+ // We loop over the gas optics, due to memory constraints
668+ constexpr int n_col_block = 1 <<14 ; // 2^14
669+
670+ Array_gpu<Float,1 > toa_src_temp ({n_col_block});
671+
672+ auto gas_optics_subset = [&](
673+ const int col_s, const int col_e, const int n_col_subset,
674+ std::unique_ptr<Optical_props_arry_rt>& optical_props_subset)
675+ {
676+ Gas_concs_gpu gas_concs_subset (gas_concs, col_s, n_col_subset);
677+ // Run the gas_optics on a subset.
678+ kdist_gpu->gas_optics (
679+ igpt-1 ,
680+ p_lay.subset ({{ {col_s, col_e}, {1 , n_lay} }}),
681+ p_lev.subset ({{ {col_s, col_e}, {1 , n_lev} }}),
682+ t_lay.subset ({{ {col_s, col_e}, {1 , n_lay} }}),
683+ gas_concs_subset,
684+ optical_props_subset,
685+ toa_src_temp,
686+ col_dry.subset ({{ {col_s, col_e}, {1 , n_lay} }}));
687+ subset_kernel_launcher_cuda::get_from_subset (
688+ n_col, n_lay, n_col_subset, col_s,
689+ optical_props->get_tau ().ptr (), optical_props->get_ssa ().ptr (), optical_props->get_g ().ptr (),
690+ optical_props_subset->get_tau ().ptr (), optical_props_subset->get_ssa ().ptr (), optical_props_subset->get_g ().ptr ());
691+
692+ };
693+
694+ const int n_blocks = n_col / n_col_block;
695+ const int n_col_residual = n_col % n_col_block;
696+
697+ std::unique_ptr<Optical_props_arry_rt> optical_props_block =
698+ std::make_unique<Optical_props_2str_rt>(n_col_block, n_lay, *kdist_gpu);
699+
700+ for (int n=0 ; n<n_blocks; ++n)
701+ {
702+ const int col_s = n*n_col_block + 1 ;
703+ const int col_e = (n+1 )*n_col_block;
704+
705+ gas_optics_subset (col_s, col_e, n_col_block, optical_props_block);
706+ }
707+
708+ optical_props_block.reset ();
709+
710+ if (n_col_residual > 0 )
711+ {
712+ std::unique_ptr<Optical_props_arry_rt> optical_props_residual =
713+ std::make_unique<Optical_props_2str_rt>(n_col_residual, n_lay, *kdist_gpu);
714+
715+ const int col_s = n_blocks*n_col_block + 1 ;
716+ const int col_e = n_col;
717+
718+ gas_optics_subset (col_s, col_e, n_col_residual, optical_props_residual);
719+ }
720+
721+ toa_src.fill (toa_src_temp ({1 }) * tsi_scaling ({1 }));
664722
665723 if (switch_cloud_optics)
666724 {
@@ -673,7 +731,7 @@ void Radiation_solver_shortwave::solve_gpu(
673731 *cloud_optical_props);
674732
675733
676- cloud_optical_props->delta_scale ();
734+ // cloud_optical_props->delta_scale();
677735
678736 // Add the cloud optical props to the gas optical properties.
679737 add_to (
0 commit comments