microhh
diff --git a/‎data/mie_lut_broadband_ncdf-1439.nc‎
7.1 MB b/‎data/mie_lut_broadband_ncdf-1439.nc‎
7.1 MB
diff --git a/‎data/mie_lut_broadband_ncdf-1800.nc‎
7.91 MB b/‎data/mie_lut_broadband_ncdf-1800.nc‎
7.91 MB
diff --git a/‎data/mie_lut_broadband_ncdf-876.nc‎
5.84 MB b/‎data/mie_lut_broadband_ncdf-876.nc‎
5.84 MB
diff --git a/‎include_rt_kernels/raytracer_kernels_bw.h‎
Lines changed: 9 additions & 8 deletions b/‎include_rt_kernels/raytracer_kernels_bw.h‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎src_cuda_rt/Raytracer_bw.cu‎
Lines changed: 26 additions & 21 deletions b/‎src_cuda_rt/Raytracer_bw.cu‎
Lines changed: 26 additions & 21 deletions
diff --git a/‎src_kernels_cuda_rt/raytracer_kernels_bw.cu‎
Lines changed: 34 additions & 30 deletions b/‎src_kernels_cuda_rt/raytracer_kernels_bw.cu‎
Lines changed: 34 additions & 30 deletions
diff --git a/‎src_test/Radiation_solver_bw.cu‎
Lines changed: 7 additions & 5 deletions b/‎src_test/Radiation_solver_bw.cu‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎src_test/Radiation_solver_rt.cu‎
Lines changed: 16 additions & 12 deletions b/‎src_test/Radiation_solver_rt.cu‎
Lines changed: 16 additions & 12 deletions
@@ -117,20 +117,21 @@ void ray_tracer_kernel_bw(
         const Float* __restrict__ mie_ang,
         const Float* __restrict__ mie_phase,
         const Float* __restrict__ mie_phase_ang,
-        const int mie_table_size);
-            
+        const int mie_cdf_table_size,
+        const int mie_phase_table_size);
+
 __global__
 void accumulate_clouds_kernel(
-    const Float* __restrict__ lwp, 
-    const Float* __restrict__ iwp, 
+    const Float* __restrict__ lwp,
+    const Float* __restrict__ iwp,
     const Float* __restrict__ tau_cloud,
     const Vector<Float> grid_d,
     const Vector<Float> grid_size,
     const Vector<int> grid_cells,
-    Float* __restrict__ liwp_cam, 
-    Float* __restrict__ tauc_cam, 
-    Float* __restrict__ dist_cam, 
-    Float* __restrict__ zen_cam, 
+    Float* __restrict__ liwp_cam,
+    Float* __restrict__ tauc_cam,
+    Float* __restrict__ dist_cam,
+    Float* __restrict__ zen_cam,
     const Camera camera);
 
 #endif
@@ -99,17 +99,17 @@ namespace
 
             const int x0 = grid_x*fx;
             const int x1 = min(grid_cells.x-1, int(floor((grid_x+1)*fx)));
-            
+
             const int y0 = grid_y*fy;
             const int y1 = min(grid_cells.y-1, int(floor((grid_y+1)*fy)));
-            
+
             const int z0 = grid_z*fz;
             const int z1 = min(grid_cells.z-1, int(floor((grid_z+1)*fz)));
 
             const int ijk_grid = grid_x + grid_y*kn_grid.x + grid_z*kn_grid.y*kn_grid.x;
             Float k_null_min = Float(1e15); // just a ridicilously high value
             Float k_null_max = Float(0.);
-           
+
             for (int k=z0; k<=z1; ++k)
                 for (int j=y0; j<=y1; ++j)
                     for (int i=x0; i<=x1; ++i)
@@ -460,7 +460,7 @@ void Raytracer_bw::trace_rays(
 
     Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, camera_count.ptr());
     Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, shot_count.ptr());
-    
+
     counter.fill(Int(0));
 
     // domain sizes
@@ -473,9 +473,10 @@ void Raytracer_bw::trace_rays(
 
     dim3 grid{bw_kernel_grid}, block{bw_kernel_block};
 
-    const int mie_table_size = mie_cdf.size();
+    const int mie_cdf_table_size = mie_cdf.size();
+    const int mie_phase_table_size = mie_phase_ang.size();
 
-    ray_tracer_kernel_bw<<<grid, block, nbg*sizeof(Float) + 2 * sizeof(Float)*mie_table_size>>>(
+    ray_tracer_kernel_bw<<<grid, block, nbg*sizeof(Float)+ sizeof(Float)*(mie_cdf_table_size+mie_phase_table_size)>>>(
             igpt-1,
             photons_per_pixel, k_null_grid.ptr(),
             camera_count.ptr(),
@@ -491,7 +492,9 @@ void Raytracer_bw::trace_rays(
             grid_size, grid_d, grid_cells, kn_grid,
             sun_direction, camera, nbg,
             mie_cdf.ptr(), mie_ang.ptr(),
-            mie_phase.ptr(), mie_phase_ang.ptr(), mie_table_size);
+            mie_phase.ptr(), mie_phase_ang.ptr(),
+            mie_cdf_table_size,
+            mie_phase_table_size);
 
     //// convert counts to fluxes
     const int block_cam_x = 8;
@@ -621,9 +624,10 @@ void Raytracer_bw::trace_rays_bb(
 
     dim3 grid{bw_kernel_grid}, block{bw_kernel_block};
 
-    const int mie_table_size = mie_cdf.size();
+    const int mie_cdf_table_size = mie_cdf.size();
+    const int mie_phase_table_size = mie_phase_ang.size();
 
-    ray_tracer_kernel_bw<<<grid, block, nbg*sizeof(Float)+ 2 * sizeof(Float)*mie_table_size>>>(
+    ray_tracer_kernel_bw<<<grid, block, nbg*sizeof(Float)+ sizeof(Float)*(mie_cdf_table_size+mie_phase_table_size)>>>(
             igpt-1,
             photons_per_pixel, k_null_grid.ptr(),
             camera_count.ptr(),
@@ -640,7 +644,8 @@ void Raytracer_bw::trace_rays_bb(
             sun_direction, camera, nbg,
             mie_cdf.ptr(), mie_ang.ptr(),
             mie_phase.ptr(), mie_phase_ang.ptr(),
-            mie_table_size);
+            mie_cdf_table_size,
+            mie_phase_table_size);
 
     //// convert counts to fluxes
     const int block_cam_x = 8;
@@ -676,7 +681,7 @@ void Raytracer_bw::accumulate_clouds(
     Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, liwp_cam.ptr());
     Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, tauc_cam.ptr());
     Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, dist_cam.ptr());
-    
+
     const int n_pix = camera.nx * camera.ny;
     const int n_block = std::min(n_pix, 512);
     const int n_grid = std::ceil(Float(n_pix)/n_block);
@@ -686,7 +691,7 @@ void Raytracer_bw::accumulate_clouds(
 
     // domain sizes
     const Vector<Float> grid_size = grid_d * grid_cells;
-    
+
     accumulate_clouds_kernel<<<grid, block>>>(
         lwp.ptr(),
         iwp.ptr(),
@@ -703,12 +708,12 @@ void Raytracer_bw::accumulate_clouds(
 }
 
 
-            
-            
-            
-            
-            
-            
-            
-            
-            
+
+
+
+
+
+
+
+
+
@@ -235,7 +235,7 @@ namespace
            atomicAdd(&camera_count[ij_cam], weight * trans_sun);
         }
         atomicAdd(&camera_shot[ij_cam], Float(1.));
-        
+
     }
 
     __device__
@@ -298,19 +298,23 @@ void ray_tracer_kernel_bw(
         const Float* __restrict__ mie_ang,
         const Float* __restrict__ mie_phase,
         const Float* __restrict__ mie_phase_ang,
-        const int mie_table_size)
+        const int mie_cdf_table_size,
+        const int mie_phase_table_size)
 {
     extern __shared__ Float shared_arrays[];
     Float* mie_cdf_shared = &shared_arrays[0];
-    Float* mie_phase_ang_shared = &shared_arrays[mie_table_size];
-    Float* bg_tau_cum = &shared_arrays[2*mie_table_size];
+    Float* mie_phase_ang_shared = &shared_arrays[mie_phase_table_size];
+    Float* bg_tau_cum = &shared_arrays[mie_phase_table_size+mie_cdf_table_size];
     if (threadIdx.x==0)
     {
-        if (mie_table_size > 0)
+        if (mie_cdf_table_size > 0)
         {
-            for (int mie_i=0; mie_i<mie_table_size; ++mie_i)
+            for (int mie_i=0; mie_i<mie_cdf_table_size; ++mie_i)
             {
                 mie_cdf_shared[mie_i] = mie_cdf[mie_i];
+            }
+            for (int mie_i=0; mie_i<mie_phase_table_size; ++mie_i)
+            {
                 mie_phase_ang_shared[mie_i] = mie_phase_ang[mie_i];
             }
         }
@@ -324,11 +328,11 @@ void ray_tracer_kernel_bw(
     }
 
     __syncthreads();
-    
+
     Vector<Float> surface_normal = {0, 0, 1};
-    
+
     const int n = blockDim.x * blockIdx.x + threadIdx.x;
-    
+
     const Float bg_transmissivity = exp(-bg_tau_cum[0]);
 
     const Vector<Float> kn_grid_d = grid_size / kn_grid;
@@ -338,7 +342,7 @@ void ray_tracer_kernel_bw(
 
     const Float s_min = max(max(grid_size.z, grid_size.x), grid_size.y) * Float_epsilon;
     const Float s_min_bg = max(max(grid_size.x, grid_size.y), z_top) * Float_epsilon;
-    
+
     while (counter[0] < camera.npix*photons_per_pixel)
     {
         const Int count = atomicAdd(&counter[0], 1);
@@ -677,19 +681,19 @@ void ray_tracer_kernel_bw(
                                     break;
                             }
                             const Float cos_scat = scatter_type == 0 ? rayleigh(rng()) : // gases -> rayleigh,
-                                                                   1 ? ( (mie_table_size > 0) //clouds: Mie or HG
-                                                                            ? cos( mie_sample_angle(mie_cdf_shared, mie_ang, rng(), r_eff[ijk], mie_table_size) )
+                                                                   1 ? ( (mie_cdf_table_size > 0) //clouds: Mie or HG
+                                                                            ? cos( mie_sample_angle(mie_cdf_shared, mie_ang, rng(), r_eff[ijk], mie_cdf_table_size) )
                                                                             :  henyey(g, rng()))
                                                                    : henyey(g, rng()); //aerosols
                             const Float sin_scat = max(Float(0.), sqrt(Float(1.) - cos_scat*cos_scat + Float_epsilon));
 
                             // SUN SCATTERING GOES HERE
                             const Phase_kind kind = scatter_type == 0 ? Phase_kind::Rayleigh :
-                                                                    1 ? (mie_table_size > 0)
+                                                                    1 ? (mie_phase_table_size > 0)
                                                                         ? Phase_kind::Mie
                                                                         : Phase_kind::HG
                                                                 : Phase_kind::HG;
-                            const Float p_sun = probability_from_sun(photon, sun_direction, sun_solid_angle, g, mie_phase_ang_shared, mie_phase, r_eff[ijk], mie_table_size,
+                            const Float p_sun = probability_from_sun(photon, sun_direction, sun_solid_angle, g, mie_phase_ang_shared, mie_phase, r_eff[ijk], mie_phase_table_size,
                                                                      surface_normal, kind);
                             const Float trans_sun = transmission_direct_sun(photon,n,rng,sun_direction,
                                                         k_null_grid,k_ext,
@@ -736,23 +740,23 @@ void ray_tracer_kernel_bw(
 }
     __global__
     void accumulate_clouds_kernel(
-            const Float* __restrict__ lwp, 
-            const Float* __restrict__ iwp, 
-            const Float* __restrict__ tau_cloud, 
+            const Float* __restrict__ lwp,
+            const Float* __restrict__ iwp,
+            const Float* __restrict__ tau_cloud,
             const Vector<Float> grid_d,
             const Vector<Float> grid_size,
             const Vector<int> grid_cells,
-            Float* __restrict__ liwp_cam, 
-            Float* __restrict__ tauc_cam, 
-            Float* __restrict__ dist_cam, 
-            Float* __restrict__ zen_cam, 
+            Float* __restrict__ liwp_cam,
+            Float* __restrict__ tauc_cam,
+            Float* __restrict__ dist_cam,
+            Float* __restrict__ zen_cam,
             const Camera camera)
     {
         const int pix = blockDim.x * blockIdx.x + threadIdx.x;
         const Float s_eps = max(max(grid_size.z, grid_size.x), grid_size.y) * Float_epsilon;
-        Vector<Float> direction; 
+        Vector<Float> direction;
         Vector<Float> position;
-    
+
         if (pix < camera.nx * camera.ny)
         {
             Float liwp_sum = 0;
@@ -778,13 +782,13 @@ void ray_tracer_kernel_bw(
             {
                 direction = normalize(camera.cam_width * (Float(2.)*i-Float(1.0)) + camera.cam_height * (Float(2.)*j-Float(1.0)) + camera.cam_depth);
             }
-            
+
             // first bring photon to top of dynamical domain
             if ((position.z >= (grid_size.z - s_eps)) && (direction.z < Float(0.)))
             {
                 const Float s = abs((position.z - grid_size.z)/direction.z);
                 position = position + direction * s - s_eps;
-                
+
                 // Cyclic boundary condition in x.
                 position.x = fmod(position.x, grid_size.x);
                 if (position.x < Float(0.))
@@ -802,26 +806,26 @@ void ray_tracer_kernel_bw(
                 const int j = float_to_int(position.y, grid_d.y, grid_cells.y);
                 const int k = float_to_int(position.z, grid_d.z, grid_cells.z);
                 const int ijk = i + j*grid_cells.x + k*grid_cells.x*grid_cells.y;
-                
+
                 const Float sx = abs((direction.x > 0) ? ((i+1) * grid_d.x - position.x)/direction.x : (i*grid_d.x - position.x)/direction.x);
                 const Float sy = abs((direction.y > 0) ? ((j+1) * grid_d.y - position.y)/direction.y : (j*grid_d.y - position.y)/direction.y);
                 const Float sz = abs((direction.z > 0) ? ((k+1) * grid_d.z - position.z)/direction.z : (k*grid_d.z - position.z)/direction.z);
                 const Float s_min = min(sx, min(sy, sz));
-                
+
                 liwp_sum += s_min * (lwp[ijk] + iwp[ijk]);
                 tauc_sum += s_min * tau_cloud[ijk];
                 if (!reached_cloud)
                 {
                     dist += s_min;
                     reached_cloud = tau_cloud[ijk] > 0;
                 }
-                
+
                 position = position + direction * s_min;
 
                 position.x += direction.x >= 0 ? s_eps : -s_eps;
                 position.y += direction.y >= 0 ? s_eps : -s_eps;
                 position.z += direction.z >= 0 ? s_eps : -s_eps;
-                        
+
                 // Cyclic boundary condition in x.
                 position.x = fmod(position.x, grid_size.x);
                 if (position.x < Float(0.))
@@ -833,7 +837,7 @@ void ray_tracer_kernel_bw(
                     position.y += grid_size.y;
 
             }
-            
+
             // divide out initial layer thicknes, equivalent to first converting lwp (g/m2) to lwc (g/m3) or optical depth to k_ext(1/m)
             liwp_cam[pix] = liwp_sum / grid_d.z;
             tauc_cam[pix] = tauc_sum / grid_d.z;
 
@@ -747,9 +747,10 @@ void Radiation_solver_shortwave::load_mie_tables(
         const int n_bnd_sw = this->get_n_bnd_gpu();
         const int n_re  = mie_nc.get_dimension_size("r_eff");
         const int n_mie = mie_nc.get_dimension_size("n_ang");
+        const int n_mie_cdf = mie_nc.get_dimension_size("n_ang_cdf");
 
-        Array<Float,3> mie_cdf(mie_nc.get_variable<Float>("phase_cdf", {n_bnd_sw, n_mie}), {n_mie, 1, n_bnd_sw});
-        Array<Float,4> mie_ang(mie_nc.get_variable<Float>("phase_cdf_angle", {n_bnd_sw, n_re, n_mie}), {n_mie, n_re, 1, n_bnd_sw});
+        Array<Float,3> mie_cdf(mie_nc.get_variable<Float>("phase_cdf", {n_bnd_sw, n_mie_cdf}), {n_mie, 1, n_bnd_sw});
+        Array<Float,4> mie_ang(mie_nc.get_variable<Float>("phase_cdf_angle", {n_bnd_sw, n_re, n_mie_cdf}), {n_mie_cdf, n_re, 1, n_bnd_sw});
 
         Array<Float,4> mie_phase(mie_nc.get_variable<Float>("phase", {n_bnd_sw, n_re, n_mie}), {n_mie, n_re, 1, n_bnd_sw});
         Array<Float,1> mie_phase_ang(mie_nc.get_variable<Float>("phase_angle", {n_mie}), {n_mie});
@@ -1137,7 +1138,8 @@ void Radiation_solver_shortwave::solve_gpu_bb(
     const int n_gpt = this->kdist_gpu->get_ngpt();
     const int n_bnd = this->kdist_gpu->get_nband();
 
-    const int n_mie = (switch_cloud_mie) ? this->mie_angs_bb.dim(1) : 0;
+    const int n_mie = (switch_cloud_mie) ? this->mie_phase_angs_bb.dim(1) : 0;
+    const int n_mie_cdf = (switch_cloud_mie) ? this->mie_angs_bb.dim(1) : 0;
     const int n_re = (switch_cloud_mie) ? this->mie_angs_bb.dim(2) : 0;
 
     const int cam_nx = radiance.dim(1);
@@ -1296,8 +1298,8 @@ void Radiation_solver_shortwave::solve_gpu_bb(
 
             if (switch_cloud_mie)
             {
-                mie_cdfs_sub = mie_cdfs_bb.subset({{ {1, n_mie}, {1,1}, {band, band} }});
-                mie_angs_sub = mie_angs_bb.subset({{ {1, n_mie}, {1, n_re}, {1,1}, {band, band} }});
+                mie_cdfs_sub = mie_cdfs_bb.subset({{ {1, n_mie_cdf}, {1,1}, {band, band} }});
+                mie_angs_sub = mie_angs_bb.subset({{ {1, n_mie_cdf}, {1, n_re}, {1,1}, {band, band} }});
                 mie_phase_sub = mie_phase_bb.subset({{ {1, n_mie}, {1, n_re}, {1,1}, {band, band} }});
             }
 
 
@@ -554,7 +554,7 @@ void Radiation_solver_shortwave::load_mie_tables(
 {
     Netcdf_file mie_nc(file_name_mie, Netcdf_mode::Read);
     const int n_re  = mie_nc.get_dimension_size("r_eff");
-    const int n_mie = mie_nc.get_dimension_size("n_ang");
+    const int n_mie = mie_nc.get_dimension_size("n_ang_cdf");
 
     const int n_bnd_sw = this->get_n_bnd_gpu();
     Array<Float,2> mie_cdf(mie_nc.get_variable<Float>("phase_cdf", {n_bnd_sw, n_mie}), {n_mie, n_bnd_sw});
@@ -784,17 +784,21 @@ void Radiation_solver_shortwave::solve_gpu(
             std::unique_ptr<Fluxes_broadband_rt> fluxes =
                     std::make_unique<Fluxes_broadband_rt>(grid_cells.x, grid_cells.y, grid_cells.z, n_lev);
 
-            rte_sw.rte_sw(
-                    optical_props,
-                    top_at_1,
-                    mu0,
-                    toa_src,
-                    sfc_alb_dir.subset({{ {band, band}, {1, n_col}}}),
-                    sfc_alb_dif.subset({{ {band, band}, {1, n_col}}}),
-                    Array_gpu<Float,1>(), // Add an empty array, no inc_flux.
-                    (*fluxes).get_flux_up(),
-                    (*fluxes).get_flux_dn(),
-                    (*fluxes).get_flux_dn_dir());
+            if (switch_twostream)
+            {
+
+                rte_sw.rte_sw(
+                        optical_props,
+                        top_at_1,
+                        mu0,
+                        toa_src,
+                        sfc_alb_dir.subset({{ {band, band}, {1, n_col}}}),
+                        sfc_alb_dif.subset({{ {band, band}, {1, n_col}}}),
+                        Array_gpu<Float,1>(), // Add an empty array, no inc_flux.
+                        (*fluxes).get_flux_up(),
+                        (*fluxes).get_flux_dn(),
+                        (*fluxes).get_flux_dn_dir());
+            }
 
             if (switch_raytracing)
             {