diff --git a/data/mie_lut_broadband.nc b/data/mie_lut_broadband.nc index ab349d84..0970b47c 100644 Binary files a/data/mie_lut_broadband.nc and b/data/mie_lut_broadband.nc differ diff --git a/data/mie_lut_visualisation.nc b/data/mie_lut_visualisation.nc index 28de3bdc..5fc50ae1 100644 Binary files a/data/mie_lut_visualisation.nc and b/data/mie_lut_visualisation.nc differ diff --git a/include_rt_kernels/raytracer_kernels_bw.h b/include_rt_kernels/raytracer_kernels_bw.h index bc9d63e9..d42e8127 100644 --- a/include_rt_kernels/raytracer_kernels_bw.h +++ b/include_rt_kernels/raytracer_kernels_bw.h @@ -117,20 +117,21 @@ void ray_tracer_kernel_bw( const Float* __restrict__ mie_ang, const Float* __restrict__ mie_phase, const Float* __restrict__ mie_phase_ang, - const int mie_table_size); - + const int mie_cdf_table_size, + const int mie_phase_table_size); + __global__ void accumulate_clouds_kernel( - const Float* __restrict__ lwp, - const Float* __restrict__ iwp, + const Float* __restrict__ lwp, + const Float* __restrict__ iwp, const Float* __restrict__ tau_cloud, const Vector grid_d, const Vector grid_size, const Vector grid_cells, - Float* __restrict__ liwp_cam, - Float* __restrict__ tauc_cam, - Float* __restrict__ dist_cam, - Float* __restrict__ zen_cam, + Float* __restrict__ liwp_cam, + Float* __restrict__ tauc_cam, + Float* __restrict__ dist_cam, + Float* __restrict__ zen_cam, const Camera camera); #endif diff --git a/src_cuda_rt/Raytracer_bw.cu b/src_cuda_rt/Raytracer_bw.cu index 85eff1ba..d18599c1 100644 --- a/src_cuda_rt/Raytracer_bw.cu +++ b/src_cuda_rt/Raytracer_bw.cu @@ -99,17 +99,17 @@ namespace const int x0 = grid_x*fx; const int x1 = min(grid_cells.x-1, int(floor((grid_x+1)*fx))); - + const int y0 = grid_y*fy; const int y1 = min(grid_cells.y-1, int(floor((grid_y+1)*fy))); - + const int z0 = grid_z*fz; const int z1 = min(grid_cells.z-1, int(floor((grid_z+1)*fz))); const int ijk_grid = grid_x + grid_y*kn_grid.x + grid_z*kn_grid.y*kn_grid.x; Float k_null_min = Float(1e15); // just a ridicilously high value Float k_null_max = Float(0.); - + for (int k=z0; k<=z1; ++k) for (int j=y0; j<=y1; ++j) for (int i=x0; i<=x1; ++i) @@ -460,7 +460,7 @@ void Raytracer_bw::trace_rays( Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, camera_count.ptr()); Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, shot_count.ptr()); - + counter.fill(Int(0)); // domain sizes @@ -473,9 +473,10 @@ void Raytracer_bw::trace_rays( dim3 grid{bw_kernel_grid}, block{bw_kernel_block}; - const int mie_table_size = mie_cdf.size(); + const int mie_cdf_table_size = mie_cdf.size(); + const int mie_phase_table_size = mie_phase_ang.size(); - ray_tracer_kernel_bw<<>>( + ray_tracer_kernel_bw<<>>( igpt-1, photons_per_pixel, k_null_grid.ptr(), camera_count.ptr(), @@ -491,7 +492,9 @@ void Raytracer_bw::trace_rays( grid_size, grid_d, grid_cells, kn_grid, sun_direction, camera, nbg, mie_cdf.ptr(), mie_ang.ptr(), - mie_phase.ptr(), mie_phase_ang.ptr(), mie_table_size); + mie_phase.ptr(), mie_phase_ang.ptr(), + mie_cdf_table_size, + mie_phase_table_size); //// convert counts to fluxes const int block_cam_x = 8; @@ -621,9 +624,10 @@ void Raytracer_bw::trace_rays_bb( dim3 grid{bw_kernel_grid}, block{bw_kernel_block}; - const int mie_table_size = mie_cdf.size(); + const int mie_cdf_table_size = mie_cdf.size(); + const int mie_phase_table_size = mie_phase_ang.size(); - ray_tracer_kernel_bw<<>>( + ray_tracer_kernel_bw<<>>( igpt-1, photons_per_pixel, k_null_grid.ptr(), camera_count.ptr(), @@ -640,7 +644,8 @@ void Raytracer_bw::trace_rays_bb( sun_direction, camera, nbg, mie_cdf.ptr(), mie_ang.ptr(), mie_phase.ptr(), mie_phase_ang.ptr(), - mie_table_size); + mie_cdf_table_size, + mie_phase_table_size); //// convert counts to fluxes const int block_cam_x = 8; @@ -676,7 +681,7 @@ void Raytracer_bw::accumulate_clouds( Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, liwp_cam.ptr()); Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, tauc_cam.ptr()); Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, dist_cam.ptr()); - + const int n_pix = camera.nx * camera.ny; const int n_block = std::min(n_pix, 512); const int n_grid = std::ceil(Float(n_pix)/n_block); @@ -686,7 +691,7 @@ void Raytracer_bw::accumulate_clouds( // domain sizes const Vector grid_size = grid_d * grid_cells; - + accumulate_clouds_kernel<<>>( lwp.ptr(), iwp.ptr(), @@ -703,12 +708,12 @@ void Raytracer_bw::accumulate_clouds( } - - - - - - - - - + + + + + + + + + diff --git a/src_kernels_cuda_rt/raytracer_kernels_bw.cu b/src_kernels_cuda_rt/raytracer_kernels_bw.cu index 609b6074..dc5976b6 100644 --- a/src_kernels_cuda_rt/raytracer_kernels_bw.cu +++ b/src_kernels_cuda_rt/raytracer_kernels_bw.cu @@ -235,7 +235,7 @@ namespace atomicAdd(&camera_count[ij_cam], weight * trans_sun); } atomicAdd(&camera_shot[ij_cam], Float(1.)); - + } __device__ @@ -298,19 +298,23 @@ void ray_tracer_kernel_bw( const Float* __restrict__ mie_ang, const Float* __restrict__ mie_phase, const Float* __restrict__ mie_phase_ang, - const int mie_table_size) + const int mie_cdf_table_size, + const int mie_phase_table_size) { extern __shared__ Float shared_arrays[]; Float* mie_cdf_shared = &shared_arrays[0]; - Float* mie_phase_ang_shared = &shared_arrays[mie_table_size]; - Float* bg_tau_cum = &shared_arrays[2*mie_table_size]; + Float* mie_phase_ang_shared = &shared_arrays[mie_cdf_table_size]; + Float* bg_tau_cum = &shared_arrays[mie_phase_table_size+mie_cdf_table_size]; if (threadIdx.x==0) { - if (mie_table_size > 0) + if (mie_cdf_table_size > 0) { - for (int mie_i=0; mie_i surface_normal = {0, 0, 1}; - + const int n = blockDim.x * blockIdx.x + threadIdx.x; - + const Float bg_transmissivity = exp(-bg_tau_cum[0]); const Vector kn_grid_d = grid_size / kn_grid; @@ -338,7 +342,7 @@ void ray_tracer_kernel_bw( const Float s_min = max(max(grid_size.z, grid_size.x), grid_size.y) * Float_epsilon; const Float s_min_bg = max(max(grid_size.x, grid_size.y), z_top) * Float_epsilon; - + while (counter[0] < camera.npix*photons_per_pixel) { const Int count = atomicAdd(&counter[0], 1); @@ -677,19 +681,19 @@ void ray_tracer_kernel_bw( break; } const Float cos_scat = scatter_type == 0 ? rayleigh(rng()) : // gases -> rayleigh, - 1 ? ( (mie_table_size > 0) //clouds: Mie or HG - ? cos( mie_sample_angle(mie_cdf_shared, mie_ang, rng(), r_eff[ijk], mie_table_size) ) + 1 ? ( (mie_cdf_table_size > 0) //clouds: Mie or HG + ? cos( mie_sample_angle(mie_cdf_shared, mie_ang, rng(), r_eff[ijk], mie_cdf_table_size) ) : henyey(g, rng())) : henyey(g, rng()); //aerosols const Float sin_scat = max(Float(0.), sqrt(Float(1.) - cos_scat*cos_scat + Float_epsilon)); // SUN SCATTERING GOES HERE const Phase_kind kind = scatter_type == 0 ? Phase_kind::Rayleigh : - 1 ? (mie_table_size > 0) + 1 ? (mie_phase_table_size > 0) ? Phase_kind::Mie : Phase_kind::HG : Phase_kind::HG; - const Float p_sun = probability_from_sun(photon, sun_direction, sun_solid_angle, g, mie_phase_ang_shared, mie_phase, r_eff[ijk], mie_table_size, + const Float p_sun = probability_from_sun(photon, sun_direction, sun_solid_angle, g, mie_phase_ang_shared, mie_phase, r_eff[ijk], mie_phase_table_size, surface_normal, kind); const Float trans_sun = transmission_direct_sun(photon,n,rng,sun_direction, k_null_grid,k_ext, @@ -736,23 +740,23 @@ void ray_tracer_kernel_bw( } __global__ void accumulate_clouds_kernel( - const Float* __restrict__ lwp, - const Float* __restrict__ iwp, - const Float* __restrict__ tau_cloud, + const Float* __restrict__ lwp, + const Float* __restrict__ iwp, + const Float* __restrict__ tau_cloud, const Vector grid_d, const Vector grid_size, const Vector grid_cells, - Float* __restrict__ liwp_cam, - Float* __restrict__ tauc_cam, - Float* __restrict__ dist_cam, - Float* __restrict__ zen_cam, + Float* __restrict__ liwp_cam, + Float* __restrict__ tauc_cam, + Float* __restrict__ dist_cam, + Float* __restrict__ zen_cam, const Camera camera) { const int pix = blockDim.x * blockIdx.x + threadIdx.x; const Float s_eps = max(max(grid_size.z, grid_size.x), grid_size.y) * Float_epsilon; - Vector direction; + Vector direction; Vector position; - + if (pix < camera.nx * camera.ny) { Float liwp_sum = 0; @@ -778,13 +782,13 @@ void ray_tracer_kernel_bw( { direction = normalize(camera.cam_width * (Float(2.)*i-Float(1.0)) + camera.cam_height * (Float(2.)*j-Float(1.0)) + camera.cam_depth); } - + // first bring photon to top of dynamical domain if ((position.z >= (grid_size.z - s_eps)) && (direction.z < Float(0.))) { const Float s = abs((position.z - grid_size.z)/direction.z); position = position + direction * s - s_eps; - + // Cyclic boundary condition in x. position.x = fmod(position.x, grid_size.x); if (position.x < Float(0.)) @@ -802,12 +806,12 @@ void ray_tracer_kernel_bw( const int j = float_to_int(position.y, grid_d.y, grid_cells.y); const int k = float_to_int(position.z, grid_d.z, grid_cells.z); const int ijk = i + j*grid_cells.x + k*grid_cells.x*grid_cells.y; - + const Float sx = abs((direction.x > 0) ? ((i+1) * grid_d.x - position.x)/direction.x : (i*grid_d.x - position.x)/direction.x); const Float sy = abs((direction.y > 0) ? ((j+1) * grid_d.y - position.y)/direction.y : (j*grid_d.y - position.y)/direction.y); const Float sz = abs((direction.z > 0) ? ((k+1) * grid_d.z - position.z)/direction.z : (k*grid_d.z - position.z)/direction.z); const Float s_min = min(sx, min(sy, sz)); - + liwp_sum += s_min * (lwp[ijk] + iwp[ijk]); tauc_sum += s_min * tau_cloud[ijk]; if (!reached_cloud) @@ -815,13 +819,13 @@ void ray_tracer_kernel_bw( dist += s_min; reached_cloud = tau_cloud[ijk] > 0; } - + position = position + direction * s_min; position.x += direction.x >= 0 ? s_eps : -s_eps; position.y += direction.y >= 0 ? s_eps : -s_eps; position.z += direction.z >= 0 ? s_eps : -s_eps; - + // Cyclic boundary condition in x. position.x = fmod(position.x, grid_size.x); if (position.x < Float(0.)) @@ -833,7 +837,7 @@ void ray_tracer_kernel_bw( position.y += grid_size.y; } - + // divide out initial layer thicknes, equivalent to first converting lwp (g/m2) to lwc (g/m3) or optical depth to k_ext(1/m) liwp_cam[pix] = liwp_sum / grid_d.z; tauc_cam[pix] = tauc_sum / grid_d.z; diff --git a/src_test/Radiation_solver_bw.cu b/src_test/Radiation_solver_bw.cu index cf18aeb4..d037abd2 100644 --- a/src_test/Radiation_solver_bw.cu +++ b/src_test/Radiation_solver_bw.cu @@ -747,9 +747,10 @@ void Radiation_solver_shortwave::load_mie_tables( const int n_bnd_sw = this->get_n_bnd_gpu(); const int n_re = mie_nc.get_dimension_size("r_eff"); const int n_mie = mie_nc.get_dimension_size("n_ang"); + const int n_mie_cdf = mie_nc.get_dimension_size("n_ang_cdf"); - Array mie_cdf(mie_nc.get_variable("phase_cdf", {n_bnd_sw, n_mie}), {n_mie, 1, n_bnd_sw}); - Array mie_ang(mie_nc.get_variable("phase_cdf_angle", {n_bnd_sw, n_re, n_mie}), {n_mie, n_re, 1, n_bnd_sw}); + Array mie_cdf(mie_nc.get_variable("phase_cdf", {n_bnd_sw, n_mie_cdf}), {n_mie_cdf, 1, n_bnd_sw}); + Array mie_ang(mie_nc.get_variable("phase_cdf_angle", {n_bnd_sw, n_re, n_mie_cdf}), {n_mie_cdf, n_re, 1, n_bnd_sw}); Array mie_phase(mie_nc.get_variable("phase", {n_bnd_sw, n_re, n_mie}), {n_mie, n_re, 1, n_bnd_sw}); Array mie_phase_ang(mie_nc.get_variable("phase_angle", {n_mie}), {n_mie}); @@ -767,10 +768,11 @@ void Radiation_solver_shortwave::load_mie_tables( const int n_bnd_sw = this->get_n_bnd_gpu(); const int n_re = mie_nc.get_dimension_size("r_eff"); const int n_mie = mie_nc.get_dimension_size("n_ang"); + const int n_mie_cdf = mie_nc.get_dimension_size("n_ang_cdf"); const int n_sub = mie_nc.get_dimension_size("sub_band"); - Array mie_cdf(mie_nc.get_variable("phase_cdf", {n_bnd_sw, n_sub, n_mie}), {n_mie, n_sub, n_bnd_sw}); - Array mie_ang(mie_nc.get_variable("phase_cdf_angle", {n_bnd_sw, n_sub, n_re, n_mie}), {n_mie, n_re, n_sub, n_bnd_sw}); + Array mie_cdf(mie_nc.get_variable("phase_cdf", {n_bnd_sw, n_sub, n_mie_cdf}), {n_mie_cdf, n_sub, n_bnd_sw}); + Array mie_ang(mie_nc.get_variable("phase_cdf_angle", {n_bnd_sw, n_sub, n_re, n_mie_cdf}), {n_mie_cdf, n_re, n_sub, n_bnd_sw}); Array mie_phase(mie_nc.get_variable("phase", {n_bnd_sw, n_sub, n_re, n_mie}), {n_mie, n_re, n_sub, n_bnd_sw}); Array mie_phase_ang(mie_nc.get_variable("phase_angle", {n_mie}), {n_mie}); @@ -827,7 +829,8 @@ void Radiation_solver_shortwave::solve_gpu( const int n_gpt = this->kdist_gpu->get_ngpt(); const int n_bnd = this->kdist_gpu->get_nband(); - const int n_mie = (switch_cloud_mie) ? this->mie_angs.dim(1) : 0; + const int n_mie = (switch_cloud_mie) ? this->mie_phase_angs.dim(1) : 0; + const int n_mie_cdf = (switch_cloud_mie) ? this->mie_angs.dim(1) : 0; const int n_re = (switch_cloud_mie) ? this->mie_angs.dim(2) : 0; const int n_sub = (switch_cloud_mie) ? this->mie_angs.dim(3) : 3; @@ -1028,8 +1031,8 @@ void Radiation_solver_shortwave::solve_gpu( if (switch_cloud_mie) { - mie_cdfs_sub = mie_cdfs.subset({{ {1, n_mie}, {iwv+1,iwv+1}, {band, band} }}); - mie_angs_sub = mie_angs.subset({{ {1, n_mie}, {1, n_re}, {iwv+1,iwv+1}, {band, band} }}); + mie_cdfs_sub = mie_cdfs.subset({{ {1, n_mie_cdf}, {iwv+1,iwv+1}, {band, band} }}); + mie_angs_sub = mie_angs.subset({{ {1, n_mie_cdf}, {1, n_re}, {iwv+1,iwv+1}, {band, band} }}); mie_phase_sub = mie_phase.subset({{ {1, n_mie}, {1, n_re}, {iwv+1,iwv+1}, {band, band} }}); } @@ -1137,7 +1140,8 @@ void Radiation_solver_shortwave::solve_gpu_bb( const int n_gpt = this->kdist_gpu->get_ngpt(); const int n_bnd = this->kdist_gpu->get_nband(); - const int n_mie = (switch_cloud_mie) ? this->mie_angs_bb.dim(1) : 0; + const int n_mie = (switch_cloud_mie) ? this->mie_phase_angs_bb.dim(1) : 0; + const int n_mie_cdf = (switch_cloud_mie) ? this->mie_angs_bb.dim(1) : 0; const int n_re = (switch_cloud_mie) ? this->mie_angs_bb.dim(2) : 0; const int cam_nx = radiance.dim(1); @@ -1296,8 +1300,8 @@ void Radiation_solver_shortwave::solve_gpu_bb( if (switch_cloud_mie) { - mie_cdfs_sub = mie_cdfs_bb.subset({{ {1, n_mie}, {1,1}, {band, band} }}); - mie_angs_sub = mie_angs_bb.subset({{ {1, n_mie}, {1, n_re}, {1,1}, {band, band} }}); + mie_cdfs_sub = mie_cdfs_bb.subset({{ {1, n_mie_cdf}, {1,1}, {band, band} }}); + mie_angs_sub = mie_angs_bb.subset({{ {1, n_mie_cdf}, {1, n_re}, {1,1}, {band, band} }}); mie_phase_sub = mie_phase_bb.subset({{ {1, n_mie}, {1, n_re}, {1,1}, {band, band} }}); } diff --git a/src_test/Radiation_solver_rt.cu b/src_test/Radiation_solver_rt.cu index f12a3dea..672b8d1a 100644 --- a/src_test/Radiation_solver_rt.cu +++ b/src_test/Radiation_solver_rt.cu @@ -554,7 +554,7 @@ void Radiation_solver_shortwave::load_mie_tables( { Netcdf_file mie_nc(file_name_mie, Netcdf_mode::Read); const int n_re = mie_nc.get_dimension_size("r_eff"); - const int n_mie = mie_nc.get_dimension_size("n_ang"); + const int n_mie = mie_nc.get_dimension_size("n_ang_cdf"); const int n_bnd_sw = this->get_n_bnd_gpu(); Array mie_cdf(mie_nc.get_variable("phase_cdf", {n_bnd_sw, n_mie}), {n_mie, n_bnd_sw}); @@ -784,17 +784,21 @@ void Radiation_solver_shortwave::solve_gpu( std::unique_ptr fluxes = std::make_unique(grid_cells.x, grid_cells.y, grid_cells.z, n_lev); - rte_sw.rte_sw( - optical_props, - top_at_1, - mu0, - toa_src, - sfc_alb_dir.subset({{ {band, band}, {1, n_col}}}), - sfc_alb_dif.subset({{ {band, band}, {1, n_col}}}), - Array_gpu(), // Add an empty array, no inc_flux. - (*fluxes).get_flux_up(), - (*fluxes).get_flux_dn(), - (*fluxes).get_flux_dn_dir()); + if (switch_twostream) + { + + rte_sw.rte_sw( + optical_props, + top_at_1, + mu0, + toa_src, + sfc_alb_dir.subset({{ {band, band}, {1, n_col}}}), + sfc_alb_dif.subset({{ {band, band}, {1, n_col}}}), + Array_gpu(), // Add an empty array, no inc_flux. + (*fluxes).get_flux_up(), + (*fluxes).get_flux_dn(), + (*fluxes).get_flux_dn_dir()); + } if (switch_raytracing) {