Skip to content

Commit e7349f7

Browse files
authored
Merge pull request #43 from MennoVeerman/main
fix bug in null collision grid creation and some addition to rt_lite
2 parents c6859e6 + dc2c741 commit e7349f7

File tree

6 files changed

+102
-25
lines changed

6 files changed

+102
-25
lines changed

include_rt_kernels/rte_solver_kernels_cuda_rt.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ namespace Rte_solver_kernels_cuda_rt
3838
void apply_BC(const int ncol, const int nlay, const int ngpt, const Bool top_at_1, Float* gpt_flux_dn);
3939

4040
void apply_BC(const int ncol, const int nlay, const int ngpt, const Bool top_at_1, const Float* inc_flux_dif, Float* gpt_flux_dn);
41+
42+
void apply_BC(const int ncol, const int nlay, const int ngpt, const Bool top_at_1, const Float inc_flux, Float* gpt_flux_dn);
4143

4244
void sw_solver_2stream(
4345
const int ncol, const int nlay, const int ngpt, const Bool top_at_1,

python/set_virtual_camera.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,13 @@
9191
cam[var][:] = args[var]
9292

9393
if not args['sza'] is None:
94+
try:
95+
ncf.createVariable('sza','f4',ncf['mu0'].dimensions)
96+
except:
97+
pass
98+
ncf['sza'][:] = np.deg2rad(args['sza'])
9499
ncf['mu0'][:] = np.cos(np.deg2rad(args['sza']))
100+
95101
if not args['azi'] is None:
96102
ncf['azi'][:] = np.deg2rad(args['azi'])
97103

src_cuda_rt/Raytracer.cu

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,18 +53,18 @@ namespace
5353
const Float fz = Float(grid_cells.z) / Float(kn_grid.z);
5454

5555
const int x0 = grid_x*fx;
56-
const int x1 = floor((grid_x+1)*fx);
56+
const int x1 = min(grid_cells.x-1, int(floor((grid_x+1)*fx)));
5757
const int y0 = grid_y*fy;
58-
const int y1 = floor((grid_y+1)*fy);
58+
const int y1 = min(grid_cells.y-1, int(floor((grid_y+1)*fy)));
5959
const int z0 = grid_z*fz;
60-
const int z1 = floor((grid_z+1)*fz);
60+
const int z1 = min(grid_cells.z-1, int(floor((grid_z+1)*fz)));
6161

6262
const int ijk_grid = grid_x + grid_y*kn_grid.x + grid_z*kn_grid.y*kn_grid.x;
6363
Float k_null = k_ext_null_min;
6464

65-
for (int k=z0; k<z1; ++k)
66-
for (int j=y0; j<y1; ++j)
67-
for (int i=x0; i<x1; ++i)
65+
for (int k=z0; k<=z1; ++k)
66+
for (int j=y0; j<=y1; ++j)
67+
for (int i=x0; i<=x1; ++i)
6868
{
6969
const int ijk_in = i + j*grid_cells.x + k*grid_cells.x*grid_cells.y;
7070
k_null = max(k_null, k_ext[ijk_in]);

src_kernels_cuda_rt/rte_solver_kernels_launchers_rt.cu

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,16 @@ namespace Rte_solver_kernels_cuda_rt
4141
}
4242

4343

44+
void apply_BC(const int ncol, const int nlay, const int ngpt, const Bool top_at_1, const Float inc_flux, Float* gpt_flux_dn)
45+
{
46+
const int block_col = 32;
47+
const int grid_col = ncol/block_col + (ncol%block_col > 0);
48+
49+
dim3 grid_gpu(grid_col);
50+
dim3 block_gpu(block_col);
51+
apply_BC_kernel<<<grid_gpu, block_gpu>>>(ncol, nlay, ngpt, top_at_1, inc_flux, gpt_flux_dn);
52+
}
53+
4454
void apply_BC(const int ncol, const int nlay, const int ngpt, const Bool top_at_1, const Float* inc_flux_dif, Float* gpt_flux_dn)
4555
{
4656
const int block_col = 32;

src_kernels_cuda_rt/rte_solver_kernels_rt.cu

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,18 @@ void apply_BC_kernel_lw(const int isfc, int ncol, const int nlay, const int ngpt
305305
}
306306
}
307307

308+
__global__
309+
void apply_BC_kernel(const int ncol, const int nlay, const int ngpt, const Bool top_at_1, const Float inc_flux, Float* __restrict__ flux_dn)
310+
{
311+
const int icol = blockIdx.x*blockDim.x + threadIdx.x;
312+
if ( (icol < ncol) )
313+
{
314+
const int idx_out = icol + ((top_at_1 ? 0 : (nlay * ncol)));
315+
const int idx_in = icol;
316+
flux_dn[idx_out] = inc_flux;
317+
}
318+
}
319+
308320
__global__
309321
void apply_BC_kernel(const int ncol, const int nlay, const int ngpt, const Bool top_at_1, const Float* __restrict__ inc_flux, Float* __restrict__ flux_dn)
310322
{

src_test/test_rt_lite.cu

Lines changed: 66 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
#include "Raytracer_bw.h"
3131
#include "raytracer_kernels_bw.h"
3232
#include "types.h"
33+
#include "rte_solver_kernels_cuda_rt.h"
34+
#include "Rte_sw_rt.h"
3335
#include "tools_gpu.h"
3436

3537

@@ -129,20 +131,22 @@ void solve_radiation(int argc, char** argv)
129131
// Parse the command line options.
130132
std::map<std::string, std::pair<bool, std::string>> command_line_switches {
131133
{"raytracing" , { true, "Use forward raytracer for irradiances. '--raytracing 256': use 256 rays per pixel" }},
132-
{"bw_raytracing" , { true, "Use backward raytracer radiances. '--raytracing 256': use 256 rays per pixel" }},
134+
{"bw-raytracing" , { true, "Use backward raytracer radiances. '--raytracing 256': use 256 rays per pixel" }},
135+
{"two-stream" , { true, "Perform two-stream computations"}},
133136
{"cloud-mie" , { false, "Use Mie tables for cloud scattering in ray tracer" }},
134137
{"independent-column", { false, "run raytracer in independent column mode"}},
135138
{"profiling" , { false, "Perform additional profiling run." }} };
136139

137140
std::map<std::string, std::pair<int, std::string>> command_line_ints {
138141
{"raytracing", {32, "Number of rays initialised at TOD per pixel."}},
139-
{"bw_raytracing", {32, "Number of rays initialised at per camera pixel."}}} ;
142+
{"bw-raytracing", {32, "Number of rays initialised at per camera pixel."}}} ;
140143

141144
if (parse_command_line_options(command_line_switches, command_line_ints, argc, argv))
142145
return;
143146

144147
const bool switch_raytracing = command_line_switches.at("raytracing" ).first;
145-
const bool switch_bw_raytracing = command_line_switches.at("bw_raytracing" ).first;
148+
const bool switch_bw_raytracing = command_line_switches.at("bw-raytracing" ).first;
149+
const bool switch_two_stream = command_line_switches.at("two-stream" ).first;
146150
const bool switch_cloud_mie = command_line_switches.at("cloud-mie" ).first;
147151
const bool switch_independent_column = command_line_switches.at("independent-column").first;
148152
const bool switch_profiling = command_line_switches.at("profiling" ).first;
@@ -207,6 +211,7 @@ void solve_radiation(int argc, char** argv)
207211
// Read the atmospheric fields.
208212
const Array<Float,2> tot_tau(input_nc.get_variable<Float>("tot_tau", {n_lay, ny, nx}), {ncol, n_lay});
209213
const Array<Float,2> tot_ssa(input_nc.get_variable<Float>("tot_ssa", {n_lay, ny, nx}), {ncol, n_lay});
214+
const Array<Float,2> tot_asy(input_nc.get_variable<Float>("tot_asy", {n_lay, ny, nx}), {ncol, n_lay});
210215

211216
Array<Float,2> cld_tau({ncol, n_lay});
212217
Array<Float,2> cld_ssa({ncol, n_lay});
@@ -225,13 +230,16 @@ void solve_radiation(int argc, char** argv)
225230
aer_asy = std::move(input_nc.get_variable<Float>("aer_asy", {n_lay, ny, nx}));
226231

227232
// read albedo, solar angles, and top-of-domain fluxes
228-
Array<Float,2> sfc_albedo({1,ncol});
229-
sfc_albedo.fill(input_nc.get_variable<Float>("albedo"));
233+
Array<Float,2> sfc_albedo({ncol, 1});
234+
sfc_albedo = std::move(input_nc.get_variable<Float>("albedo", {ny, nx}));
235+
230236
const Float zenith_angle = input_nc.get_variable<Float>("sza");
231237
const Float azimuth_angle = input_nc.get_variable<Float>("azi");
232238
const Float tod_dir = input_nc.get_variable<Float>("tod_direct");
233239

234-
240+
Array<Float,1> mu0({ncol});
241+
mu0.fill(cos(zenith_angle));
242+
235243
Camera camera;
236244
if (switch_bw_raytracing)
237245
{
@@ -267,6 +275,10 @@ void solve_radiation(int argc, char** argv)
267275
Array_gpu<Float,3> flux_abs_dif({nx, ny, nz});
268276
Array_gpu<Float,2> radiance({camera.nx, camera.ny});
269277

278+
Array_gpu<Float,2> flux_dn_2stream;
279+
Array_gpu<Float,2> flux_up_2stream;
280+
Array_gpu<Float,2> flux_dn_dir_2stream;
281+
270282
// empty arrays (mie scattering not (yet) supported in lite version)
271283
Array<Float,2> mie_cdfs_c;
272284
Array<Float,3> mie_angs_c;
@@ -317,34 +329,69 @@ void solve_radiation(int argc, char** argv)
317329
lum_c.fill(Float(0.));
318330
Array_gpu<Float,1> land_use_map(lum_c);
319331

320-
332+
//// GPU arrays
333+
Array_gpu<Float,2> tot_tau_g(tot_tau);
334+
Array_gpu<Float,2> tot_ssa_g(tot_ssa);
335+
Array_gpu<Float,2> tot_asy_g(tot_asy);
336+
Array_gpu<Float,2> cld_tau_g(cld_tau);
337+
Array_gpu<Float,2> cld_ssa_g(cld_ssa);
338+
Array_gpu<Float,2> cld_asy_g(cld_asy);
339+
Array_gpu<Float,2> aer_tau_g(aer_tau);
340+
Array_gpu<Float,2> aer_ssa_g(aer_ssa);
341+
Array_gpu<Float,2> aer_asy_g(aer_asy);
342+
Array_gpu<Float,2> sfc_albedo_g(sfc_albedo);
343+
Array_gpu<Float,1> mu0_g(mu0);
344+
321345
////// CREATE THE OUTPUT FILE //////
322346
// Create the general dimensions and arrays.
323347
Status::print_message("Preparing NetCDF output file.");
324348

325349
Netcdf_file output_nc("rt_lite_output.nc", Netcdf_mode::Create);
326-
if (switch_raytracing)
350+
if (switch_raytracing || switch_two_stream)
327351
{
328352
output_nc.add_dimension("x", nx);
329353
output_nc.add_dimension("y", ny);
330-
output_nc.add_dimension("z", n_z_in);
354+
output_nc.add_dimension("z", nz);
355+
output_nc.add_dimension("lev", n_lay+1);
331356
}
332357
if (switch_bw_raytracing)
333358
{
334359
output_nc.add_dimension("nx", camera.nx);
335360
output_nc.add_dimension("ny", camera.ny);
336361
}
337362

338-
//// GPU arrays
339-
Array_gpu<Float,2> tot_tau_g(tot_tau);
340-
Array_gpu<Float,2> tot_ssa_g(tot_ssa);
341-
Array_gpu<Float,2> cld_tau_g(cld_tau);
342-
Array_gpu<Float,2> cld_ssa_g(cld_ssa);
343-
Array_gpu<Float,2> cld_asy_g(cld_asy);
344-
Array_gpu<Float,2> aer_tau_g(aer_tau);
345-
Array_gpu<Float,2> aer_ssa_g(aer_ssa);
346-
Array_gpu<Float,2> aer_asy_g(aer_asy);
347-
Array_gpu<Float,2> sfc_albedo_g(sfc_albedo);
363+
364+
if (switch_two_stream)
365+
{
366+
flux_up_2stream.set_dims({ncol, n_lay+1});
367+
flux_dn_2stream.set_dims({ncol, n_lay+1});
368+
flux_dn_dir_2stream.set_dims({ncol, n_lay+1});
369+
370+
Rte_sw_rt rte_sw;
371+
Rte_solver_kernels_cuda_rt::apply_BC(ncol, n_lay, 1, 0, tod_dir * cos(zenith_angle), flux_dn_dir_2stream.ptr());
372+
Rte_solver_kernels_cuda_rt::apply_BC(ncol, n_lay, 1, 0, flux_dn_2stream.ptr());
373+
374+
Rte_solver_kernels_cuda_rt::sw_solver_2stream(
375+
ncol, n_lay, 1, 0,
376+
tot_tau_g.ptr(),
377+
tot_ssa_g.ptr(),
378+
tot_asy_g.ptr(),
379+
mu0_g.ptr(),
380+
sfc_albedo_g.ptr(), sfc_albedo_g.ptr(),
381+
flux_up_2stream.ptr(), flux_dn_2stream.ptr(), flux_dn_dir_2stream.ptr());
382+
383+
Array<Float,2> flux_up_2stream_c(flux_up_2stream);
384+
Array<Float,2> flux_dn_2stream_c(flux_dn_2stream);
385+
Array<Float,2> flux_dn_dir_2stream_c(flux_dn_dir_2stream);
386+
387+
auto nc_up_2stream = output_nc.add_variable<Float>("flux_up_2stream" , {"lev", "y", "x"});
388+
auto nc_dn_2stream = output_nc.add_variable<Float>("flux_dn_2stream" , {"lev", "y", "x"});
389+
auto nc_dn_dir_2stream = output_nc.add_variable<Float>("flux_dn_dir_2stream" , {"lev", "y", "x"});
390+
391+
nc_up_2stream.insert(flux_up_2stream_c .v(), {0, 0, 0});
392+
nc_dn_2stream.insert(flux_dn_2stream_c .v(), {0, 0, 0});
393+
nc_dn_dir_2stream.insert(flux_dn_dir_2stream_c .v(), {0, 0, 0});
394+
}
348395

349396
if (switch_raytracing)
350397
{

0 commit comments

Comments
 (0)