Skip to content

Commit 7a73d12

Browse files
author
Menno Veerman
committed
add new broadband mie LUTs (with three different sizes), copy to mie_lut_broadband.nc to use. Also use seperate dimension sizes for the cdf and phase LUTs where currently appropriate in the code. Also make sure than the two-stream code is actually ignored when running with --no-two-stream
1 parent c1bb890 commit 7a73d12

File tree

8 files changed

+92
-76
lines changed

8 files changed

+92
-76
lines changed
7.1 MB
Binary file not shown.
7.91 MB
Binary file not shown.

data/mie_lut_broadband_ncdf-876.nc

5.84 MB
Binary file not shown.

include_rt_kernels/raytracer_kernels_bw.h

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -117,20 +117,21 @@ void ray_tracer_kernel_bw(
117117
const Float* __restrict__ mie_ang,
118118
const Float* __restrict__ mie_phase,
119119
const Float* __restrict__ mie_phase_ang,
120-
const int mie_table_size);
121-
120+
const int mie_cdf_table_size,
121+
const int mie_phase_table_size);
122+
122123
__global__
123124
void accumulate_clouds_kernel(
124-
const Float* __restrict__ lwp,
125-
const Float* __restrict__ iwp,
125+
const Float* __restrict__ lwp,
126+
const Float* __restrict__ iwp,
126127
const Float* __restrict__ tau_cloud,
127128
const Vector<Float> grid_d,
128129
const Vector<Float> grid_size,
129130
const Vector<int> grid_cells,
130-
Float* __restrict__ liwp_cam,
131-
Float* __restrict__ tauc_cam,
132-
Float* __restrict__ dist_cam,
133-
Float* __restrict__ zen_cam,
131+
Float* __restrict__ liwp_cam,
132+
Float* __restrict__ tauc_cam,
133+
Float* __restrict__ dist_cam,
134+
Float* __restrict__ zen_cam,
134135
const Camera camera);
135136

136137
#endif

src_cuda_rt/Raytracer_bw.cu

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -99,17 +99,17 @@ namespace
9999

100100
const int x0 = grid_x*fx;
101101
const int x1 = min(grid_cells.x-1, int(floor((grid_x+1)*fx)));
102-
102+
103103
const int y0 = grid_y*fy;
104104
const int y1 = min(grid_cells.y-1, int(floor((grid_y+1)*fy)));
105-
105+
106106
const int z0 = grid_z*fz;
107107
const int z1 = min(grid_cells.z-1, int(floor((grid_z+1)*fz)));
108108

109109
const int ijk_grid = grid_x + grid_y*kn_grid.x + grid_z*kn_grid.y*kn_grid.x;
110110
Float k_null_min = Float(1e15); // just a ridicilously high value
111111
Float k_null_max = Float(0.);
112-
112+
113113
for (int k=z0; k<=z1; ++k)
114114
for (int j=y0; j<=y1; ++j)
115115
for (int i=x0; i<=x1; ++i)
@@ -460,7 +460,7 @@ void Raytracer_bw::trace_rays(
460460

461461
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, camera_count.ptr());
462462
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, shot_count.ptr());
463-
463+
464464
counter.fill(Int(0));
465465

466466
// domain sizes
@@ -473,9 +473,10 @@ void Raytracer_bw::trace_rays(
473473

474474
dim3 grid{bw_kernel_grid}, block{bw_kernel_block};
475475

476-
const int mie_table_size = mie_cdf.size();
476+
const int mie_cdf_table_size = mie_cdf.size();
477+
const int mie_phase_table_size = mie_phase_ang.size();
477478

478-
ray_tracer_kernel_bw<<<grid, block, nbg*sizeof(Float) + 2 * sizeof(Float)*mie_table_size>>>(
479+
ray_tracer_kernel_bw<<<grid, block, nbg*sizeof(Float)+ sizeof(Float)*(mie_cdf_table_size+mie_phase_table_size)>>>(
479480
igpt-1,
480481
photons_per_pixel, k_null_grid.ptr(),
481482
camera_count.ptr(),
@@ -491,7 +492,9 @@ void Raytracer_bw::trace_rays(
491492
grid_size, grid_d, grid_cells, kn_grid,
492493
sun_direction, camera, nbg,
493494
mie_cdf.ptr(), mie_ang.ptr(),
494-
mie_phase.ptr(), mie_phase_ang.ptr(), mie_table_size);
495+
mie_phase.ptr(), mie_phase_ang.ptr(),
496+
mie_cdf_table_size,
497+
mie_phase_table_size);
495498

496499
//// convert counts to fluxes
497500
const int block_cam_x = 8;
@@ -621,9 +624,10 @@ void Raytracer_bw::trace_rays_bb(
621624

622625
dim3 grid{bw_kernel_grid}, block{bw_kernel_block};
623626

624-
const int mie_table_size = mie_cdf.size();
627+
const int mie_cdf_table_size = mie_cdf.size();
628+
const int mie_phase_table_size = mie_phase_ang.size();
625629

626-
ray_tracer_kernel_bw<<<grid, block, nbg*sizeof(Float)+ 2 * sizeof(Float)*mie_table_size>>>(
630+
ray_tracer_kernel_bw<<<grid, block, nbg*sizeof(Float)+ sizeof(Float)*(mie_cdf_table_size+mie_phase_table_size)>>>(
627631
igpt-1,
628632
photons_per_pixel, k_null_grid.ptr(),
629633
camera_count.ptr(),
@@ -640,7 +644,8 @@ void Raytracer_bw::trace_rays_bb(
640644
sun_direction, camera, nbg,
641645
mie_cdf.ptr(), mie_ang.ptr(),
642646
mie_phase.ptr(), mie_phase_ang.ptr(),
643-
mie_table_size);
647+
mie_cdf_table_size,
648+
mie_phase_table_size);
644649

645650
//// convert counts to fluxes
646651
const int block_cam_x = 8;
@@ -676,7 +681,7 @@ void Raytracer_bw::accumulate_clouds(
676681
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, liwp_cam.ptr());
677682
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, tauc_cam.ptr());
678683
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, dist_cam.ptr());
679-
684+
680685
const int n_pix = camera.nx * camera.ny;
681686
const int n_block = std::min(n_pix, 512);
682687
const int n_grid = std::ceil(Float(n_pix)/n_block);
@@ -686,7 +691,7 @@ void Raytracer_bw::accumulate_clouds(
686691

687692
// domain sizes
688693
const Vector<Float> grid_size = grid_d * grid_cells;
689-
694+
690695
accumulate_clouds_kernel<<<grid, block>>>(
691696
lwp.ptr(),
692697
iwp.ptr(),
@@ -703,12 +708,12 @@ void Raytracer_bw::accumulate_clouds(
703708
}
704709

705710

706-
707-
708-
709-
710-
711-
712-
713-
714-
711+
712+
713+
714+
715+
716+
717+
718+
719+

src_kernels_cuda_rt/raytracer_kernels_bw.cu

Lines changed: 34 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ namespace
235235
atomicAdd(&camera_count[ij_cam], weight * trans_sun);
236236
}
237237
atomicAdd(&camera_shot[ij_cam], Float(1.));
238-
238+
239239
}
240240

241241
__device__
@@ -298,19 +298,23 @@ void ray_tracer_kernel_bw(
298298
const Float* __restrict__ mie_ang,
299299
const Float* __restrict__ mie_phase,
300300
const Float* __restrict__ mie_phase_ang,
301-
const int mie_table_size)
301+
const int mie_cdf_table_size,
302+
const int mie_phase_table_size)
302303
{
303304
extern __shared__ Float shared_arrays[];
304305
Float* mie_cdf_shared = &shared_arrays[0];
305-
Float* mie_phase_ang_shared = &shared_arrays[mie_table_size];
306-
Float* bg_tau_cum = &shared_arrays[2*mie_table_size];
306+
Float* mie_phase_ang_shared = &shared_arrays[mie_phase_table_size];
307+
Float* bg_tau_cum = &shared_arrays[mie_phase_table_size+mie_cdf_table_size];
307308
if (threadIdx.x==0)
308309
{
309-
if (mie_table_size > 0)
310+
if (mie_cdf_table_size > 0)
310311
{
311-
for (int mie_i=0; mie_i<mie_table_size; ++mie_i)
312+
for (int mie_i=0; mie_i<mie_cdf_table_size; ++mie_i)
312313
{
313314
mie_cdf_shared[mie_i] = mie_cdf[mie_i];
315+
}
316+
for (int mie_i=0; mie_i<mie_phase_table_size; ++mie_i)
317+
{
314318
mie_phase_ang_shared[mie_i] = mie_phase_ang[mie_i];
315319
}
316320
}
@@ -324,11 +328,11 @@ void ray_tracer_kernel_bw(
324328
}
325329

326330
__syncthreads();
327-
331+
328332
Vector<Float> surface_normal = {0, 0, 1};
329-
333+
330334
const int n = blockDim.x * blockIdx.x + threadIdx.x;
331-
335+
332336
const Float bg_transmissivity = exp(-bg_tau_cum[0]);
333337

334338
const Vector<Float> kn_grid_d = grid_size / kn_grid;
@@ -338,7 +342,7 @@ void ray_tracer_kernel_bw(
338342

339343
const Float s_min = max(max(grid_size.z, grid_size.x), grid_size.y) * Float_epsilon;
340344
const Float s_min_bg = max(max(grid_size.x, grid_size.y), z_top) * Float_epsilon;
341-
345+
342346
while (counter[0] < camera.npix*photons_per_pixel)
343347
{
344348
const Int count = atomicAdd(&counter[0], 1);
@@ -677,19 +681,19 @@ void ray_tracer_kernel_bw(
677681
break;
678682
}
679683
const Float cos_scat = scatter_type == 0 ? rayleigh(rng()) : // gases -> rayleigh,
680-
1 ? ( (mie_table_size > 0) //clouds: Mie or HG
681-
? cos( mie_sample_angle(mie_cdf_shared, mie_ang, rng(), r_eff[ijk], mie_table_size) )
684+
1 ? ( (mie_cdf_table_size > 0) //clouds: Mie or HG
685+
? cos( mie_sample_angle(mie_cdf_shared, mie_ang, rng(), r_eff[ijk], mie_cdf_table_size) )
682686
: henyey(g, rng()))
683687
: henyey(g, rng()); //aerosols
684688
const Float sin_scat = max(Float(0.), sqrt(Float(1.) - cos_scat*cos_scat + Float_epsilon));
685689

686690
// SUN SCATTERING GOES HERE
687691
const Phase_kind kind = scatter_type == 0 ? Phase_kind::Rayleigh :
688-
1 ? (mie_table_size > 0)
692+
1 ? (mie_phase_table_size > 0)
689693
? Phase_kind::Mie
690694
: Phase_kind::HG
691695
: Phase_kind::HG;
692-
const Float p_sun = probability_from_sun(photon, sun_direction, sun_solid_angle, g, mie_phase_ang_shared, mie_phase, r_eff[ijk], mie_table_size,
696+
const Float p_sun = probability_from_sun(photon, sun_direction, sun_solid_angle, g, mie_phase_ang_shared, mie_phase, r_eff[ijk], mie_phase_table_size,
693697
surface_normal, kind);
694698
const Float trans_sun = transmission_direct_sun(photon,n,rng,sun_direction,
695699
k_null_grid,k_ext,
@@ -736,23 +740,23 @@ void ray_tracer_kernel_bw(
736740
}
737741
__global__
738742
void accumulate_clouds_kernel(
739-
const Float* __restrict__ lwp,
740-
const Float* __restrict__ iwp,
741-
const Float* __restrict__ tau_cloud,
743+
const Float* __restrict__ lwp,
744+
const Float* __restrict__ iwp,
745+
const Float* __restrict__ tau_cloud,
742746
const Vector<Float> grid_d,
743747
const Vector<Float> grid_size,
744748
const Vector<int> grid_cells,
745-
Float* __restrict__ liwp_cam,
746-
Float* __restrict__ tauc_cam,
747-
Float* __restrict__ dist_cam,
748-
Float* __restrict__ zen_cam,
749+
Float* __restrict__ liwp_cam,
750+
Float* __restrict__ tauc_cam,
751+
Float* __restrict__ dist_cam,
752+
Float* __restrict__ zen_cam,
749753
const Camera camera)
750754
{
751755
const int pix = blockDim.x * blockIdx.x + threadIdx.x;
752756
const Float s_eps = max(max(grid_size.z, grid_size.x), grid_size.y) * Float_epsilon;
753-
Vector<Float> direction;
757+
Vector<Float> direction;
754758
Vector<Float> position;
755-
759+
756760
if (pix < camera.nx * camera.ny)
757761
{
758762
Float liwp_sum = 0;
@@ -778,13 +782,13 @@ void ray_tracer_kernel_bw(
778782
{
779783
direction = normalize(camera.cam_width * (Float(2.)*i-Float(1.0)) + camera.cam_height * (Float(2.)*j-Float(1.0)) + camera.cam_depth);
780784
}
781-
785+
782786
// first bring photon to top of dynamical domain
783787
if ((position.z >= (grid_size.z - s_eps)) && (direction.z < Float(0.)))
784788
{
785789
const Float s = abs((position.z - grid_size.z)/direction.z);
786790
position = position + direction * s - s_eps;
787-
791+
788792
// Cyclic boundary condition in x.
789793
position.x = fmod(position.x, grid_size.x);
790794
if (position.x < Float(0.))
@@ -802,26 +806,26 @@ void ray_tracer_kernel_bw(
802806
const int j = float_to_int(position.y, grid_d.y, grid_cells.y);
803807
const int k = float_to_int(position.z, grid_d.z, grid_cells.z);
804808
const int ijk = i + j*grid_cells.x + k*grid_cells.x*grid_cells.y;
805-
809+
806810
const Float sx = abs((direction.x > 0) ? ((i+1) * grid_d.x - position.x)/direction.x : (i*grid_d.x - position.x)/direction.x);
807811
const Float sy = abs((direction.y > 0) ? ((j+1) * grid_d.y - position.y)/direction.y : (j*grid_d.y - position.y)/direction.y);
808812
const Float sz = abs((direction.z > 0) ? ((k+1) * grid_d.z - position.z)/direction.z : (k*grid_d.z - position.z)/direction.z);
809813
const Float s_min = min(sx, min(sy, sz));
810-
814+
811815
liwp_sum += s_min * (lwp[ijk] + iwp[ijk]);
812816
tauc_sum += s_min * tau_cloud[ijk];
813817
if (!reached_cloud)
814818
{
815819
dist += s_min;
816820
reached_cloud = tau_cloud[ijk] > 0;
817821
}
818-
822+
819823
position = position + direction * s_min;
820824

821825
position.x += direction.x >= 0 ? s_eps : -s_eps;
822826
position.y += direction.y >= 0 ? s_eps : -s_eps;
823827
position.z += direction.z >= 0 ? s_eps : -s_eps;
824-
828+
825829
// Cyclic boundary condition in x.
826830
position.x = fmod(position.x, grid_size.x);
827831
if (position.x < Float(0.))
@@ -833,7 +837,7 @@ void ray_tracer_kernel_bw(
833837
position.y += grid_size.y;
834838

835839
}
836-
840+
837841
// divide out initial layer thicknes, equivalent to first converting lwp (g/m2) to lwc (g/m3) or optical depth to k_ext(1/m)
838842
liwp_cam[pix] = liwp_sum / grid_d.z;
839843
tauc_cam[pix] = tauc_sum / grid_d.z;

src_test/Radiation_solver_bw.cu

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -747,9 +747,10 @@ void Radiation_solver_shortwave::load_mie_tables(
747747
const int n_bnd_sw = this->get_n_bnd_gpu();
748748
const int n_re = mie_nc.get_dimension_size("r_eff");
749749
const int n_mie = mie_nc.get_dimension_size("n_ang");
750+
const int n_mie_cdf = mie_nc.get_dimension_size("n_ang_cdf");
750751

751-
Array<Float,3> mie_cdf(mie_nc.get_variable<Float>("phase_cdf", {n_bnd_sw, n_mie}), {n_mie, 1, n_bnd_sw});
752-
Array<Float,4> mie_ang(mie_nc.get_variable<Float>("phase_cdf_angle", {n_bnd_sw, n_re, n_mie}), {n_mie, n_re, 1, n_bnd_sw});
752+
Array<Float,3> mie_cdf(mie_nc.get_variable<Float>("phase_cdf", {n_bnd_sw, n_mie_cdf}), {n_mie, 1, n_bnd_sw});
753+
Array<Float,4> mie_ang(mie_nc.get_variable<Float>("phase_cdf_angle", {n_bnd_sw, n_re, n_mie_cdf}), {n_mie_cdf, n_re, 1, n_bnd_sw});
753754

754755
Array<Float,4> mie_phase(mie_nc.get_variable<Float>("phase", {n_bnd_sw, n_re, n_mie}), {n_mie, n_re, 1, n_bnd_sw});
755756
Array<Float,1> mie_phase_ang(mie_nc.get_variable<Float>("phase_angle", {n_mie}), {n_mie});
@@ -1137,7 +1138,8 @@ void Radiation_solver_shortwave::solve_gpu_bb(
11371138
const int n_gpt = this->kdist_gpu->get_ngpt();
11381139
const int n_bnd = this->kdist_gpu->get_nband();
11391140

1140-
const int n_mie = (switch_cloud_mie) ? this->mie_angs_bb.dim(1) : 0;
1141+
const int n_mie = (switch_cloud_mie) ? this->mie_phase_angs_bb.dim(1) : 0;
1142+
const int n_mie_cdf = (switch_cloud_mie) ? this->mie_angs_bb.dim(1) : 0;
11411143
const int n_re = (switch_cloud_mie) ? this->mie_angs_bb.dim(2) : 0;
11421144

11431145
const int cam_nx = radiance.dim(1);
@@ -1296,8 +1298,8 @@ void Radiation_solver_shortwave::solve_gpu_bb(
12961298

12971299
if (switch_cloud_mie)
12981300
{
1299-
mie_cdfs_sub = mie_cdfs_bb.subset({{ {1, n_mie}, {1,1}, {band, band} }});
1300-
mie_angs_sub = mie_angs_bb.subset({{ {1, n_mie}, {1, n_re}, {1,1}, {band, band} }});
1301+
mie_cdfs_sub = mie_cdfs_bb.subset({{ {1, n_mie_cdf}, {1,1}, {band, band} }});
1302+
mie_angs_sub = mie_angs_bb.subset({{ {1, n_mie_cdf}, {1, n_re}, {1,1}, {band, band} }});
13011303
mie_phase_sub = mie_phase_bb.subset({{ {1, n_mie}, {1, n_re}, {1,1}, {band, band} }});
13021304
}
13031305

src_test/Radiation_solver_rt.cu

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,7 @@ void Radiation_solver_shortwave::load_mie_tables(
554554
{
555555
Netcdf_file mie_nc(file_name_mie, Netcdf_mode::Read);
556556
const int n_re = mie_nc.get_dimension_size("r_eff");
557-
const int n_mie = mie_nc.get_dimension_size("n_ang");
557+
const int n_mie = mie_nc.get_dimension_size("n_ang_cdf");
558558

559559
const int n_bnd_sw = this->get_n_bnd_gpu();
560560
Array<Float,2> mie_cdf(mie_nc.get_variable<Float>("phase_cdf", {n_bnd_sw, n_mie}), {n_mie, n_bnd_sw});
@@ -784,17 +784,21 @@ void Radiation_solver_shortwave::solve_gpu(
784784
std::unique_ptr<Fluxes_broadband_rt> fluxes =
785785
std::make_unique<Fluxes_broadband_rt>(grid_cells.x, grid_cells.y, grid_cells.z, n_lev);
786786

787-
rte_sw.rte_sw(
788-
optical_props,
789-
top_at_1,
790-
mu0,
791-
toa_src,
792-
sfc_alb_dir.subset({{ {band, band}, {1, n_col}}}),
793-
sfc_alb_dif.subset({{ {band, band}, {1, n_col}}}),
794-
Array_gpu<Float,1>(), // Add an empty array, no inc_flux.
795-
(*fluxes).get_flux_up(),
796-
(*fluxes).get_flux_dn(),
797-
(*fluxes).get_flux_dn_dir());
787+
if (switch_twostream)
788+
{
789+
790+
rte_sw.rte_sw(
791+
optical_props,
792+
top_at_1,
793+
mu0,
794+
toa_src,
795+
sfc_alb_dir.subset({{ {band, band}, {1, n_col}}}),
796+
sfc_alb_dif.subset({{ {band, band}, {1, n_col}}}),
797+
Array_gpu<Float,1>(), // Add an empty array, no inc_flux.
798+
(*fluxes).get_flux_up(),
799+
(*fluxes).get_flux_dn(),
800+
(*fluxes).get_flux_dn_dir());
801+
}
798802

799803
if (switch_raytracing)
800804
{

0 commit comments

Comments
 (0)