Skip to content

Commit aa73645

Browse files
mlee03mlee03
authored andcommitted
interp_gpu
1 parent ffa6ce6 commit aa73645

File tree

11 files changed

+240
-106
lines changed

11 files changed

+240
-106
lines changed

Makefile.am

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
ACLOCAL_AMFLAGS = -I m4
2121

2222
SUBDIRS = lib/libnfu \
23-
src \
2423
tools/libfrencutils \
24+
tools/libfrencutils_gpu \
2525
tools/check_mask \
2626
tools/cubic_utils \
2727
tools/fregrid \
@@ -45,15 +45,10 @@ SUBDIRS = lib/libnfu \
4545
tools/simple_hydrog/lakes \
4646
tools/simple_hydrog/rmvpr \
4747
tools/nc_null_check
48-
49-
SUBDIRS += man \
50-
tests
51-
52-
if ENABLE_GPU
53-
SUBDIRS += tools/libfrencutils_gpu
48+
SUBDIRS += tests
5449
SUBDIRS += tools/fregrid_gpu
5550
SUBDIRS += t_gpu
56-
endif
51+
5752

5853
# Shortcut targets to make it easier to run (very) expensive tests.
5954
check-expensive:

configure.ac

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,6 @@ AC_SUBST(RPATH_FLAGS) dnl
254254
AC_CONFIG_HEADERS([config.h])
255255
AC_CONFIG_FILES([Makefile
256256
lib/libnfu/Makefile
257-
man/Makefile
258-
src/Makefile
259257
tests/Makefile
260258
tools/libfrencutils/Makefile
261259
tools/libfrencutils_gpu/Makefile

tools/fregrid_gpu/conserve_interp_gpu.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ void setup_conserve_interp_gpu(int ntiles_input_grid, Grid_config *input_grid, i
6060

6161
copy_grid_to_device_gpu(ngridpts_output_grid, output_grid[otile].latc, output_grid[otile].lonc);
6262

63-
get_grid_cell_struct_gpu( nlon_output_cells, nlat_output_cells, output_grid+otile, &output_grid_cells );
63+
get_grid_cell_struct_gpu( nlon_output_cells, nlat_output_cells, output_grid[otile].latc,
64+
output_grid[otile].lonc, &output_grid_cells );
6465

6566
for(int itile=0; itile<ntiles_input_grid; itile++){
6667

tools/libfrencutils_gpu/Makefile.am

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ libfrencutils_gpu_a_SOURCES = create_xgrid_gpu.c \
2525
create_xgrid_gpu.h \
2626
create_xgrid_utils_gpu.c \
2727
create_xgrid_utils_gpu.h \
28+
interp_gpu.c \
29+
interp_gpu.h \
2830
general_utils_gpu.c \
2931
general_utils_gpu.h
3032

tools/libfrencutils_gpu/create_xgrid_gpu.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ int get_upbound_nxcells_2dx2d_gpu(const int nlon_input_cells, const int nlat_in
6363
output_grid_ncells)
6464
#pragma acc data copy(upbound_nxcells)
6565
#pragma acc parallel loop independent reduction(+:upbound_nxcells)
66-
for( int ij1=ij1_start ; ij1<ij1_end ; ij1++) {
66+
for( int ij1=0 ; ij1<input_grid_ncells ; ij1++) {
6767
if( skip_input_cells[ij1] > MASK_THRESH ) {
6868

6969
int i_approx_xcells_per_ij1=0;
@@ -115,10 +115,10 @@ int get_upbound_nxcells_2dx2d_gpu(const int nlon_input_cells, const int nlat_in
115115
ij2_max = max(ij2_max, ij2);
116116

117117
} //ij2
118+
118119
approx_xcells_per_ij1[ij1] = i_approx_xcells_per_ij1;
119-
ij2_start[ij1] = ij2_min ;
120+
ij2_start[ij1] = ij2_min;
120121
ij2_end[ij1] = ij2_max;
121-
122122
} //mask
123123
} //ij1
124124

@@ -183,7 +183,7 @@ int create_xgrid_2dx2d_order1_gpu(const int nlon_input_cells, const int nlat_in
183183
copyin(input_grid_ncells, output_grid_ncells) \
184184
copy(nxcells)
185185
#pragma acc parallel loop reduction(+:nxcells)
186-
for(int ij1=ij1_start; ij1<ij1_end; ij1++) {
186+
for(int ij1=0; ij1<input_grid_ncells; ij1++) {
187187
if(mask_input_grid[ij1] > MASK_THRESH) {
188188

189189
double input_cell_lon_vertices[MV], input_cell_lat_vertices[MV];

tools/libfrencutils_gpu/create_xgrid_utils_gpu.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,8 @@ void get_grid_area_gpu(const int *nlon, const int *nlat, const double *lon, cons
5555

5656
void get_grid_great_circle_area_gpu(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area)
5757
{
58-
int nx, ny, nxp, nyp, i, j, n_in;
58+
int nx, ny, nxp, nyp, i, j;
5959
int n0, n1, n2, n3;
60-
double x_in[20], y_in[20], z_in[20];
6160
struct Node_gpu *grid=NULL;
6261
double *x=NULL, *y=NULL, *z=NULL;
6362

@@ -643,13 +642,10 @@ int clip_2dx2d_great_circle_gpu(const double x1_in[], const double y1_in[], cons
643642
return n_out;
644643
}
645644

646-
void get_grid_cell_struct_gpu( const int nlon, const int nlat, const Grid_config *output_grid,
645+
void get_grid_cell_struct_gpu( const int nlon, const int nlat, double *lon, double *lat,
647646
Grid_cells_struct_config *grid_cells )
648647
{
649648

650-
double *lon = output_grid->lonc;
651-
double *lat = output_grid->latc;
652-
653649
int ncells=nlon*nlat;
654650
int npts=(nlon+1)*(nlat+1);
655651

tools/libfrencutils_gpu/create_xgrid_utils_gpu.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ int clip_2dx2d_great_circle_gpu(const double x1_in[], const double y1_in[], cons
4444
const double x2_in[], const double y2_in[], const double z2_in [], int n2_in,
4545
double x_out[], double y_out[], double z_out[]);
4646

47-
void get_grid_cell_struct_gpu( const int nlon, const int nlat, const Grid_config *output_grid,
48-
Grid_cells_struct_config *grid_cells);
47+
void get_grid_cell_struct_gpu( const int nlon, const int nlat, double *lon, double *lat,
48+
Grid_cells_struct_config *grid_cells);
4949

5050
void free_grid_cell_struct_gpu( const int ncells, Grid_cells_struct_config *grid_cells);
5151

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/***********************************************************************
2+
* GNU Lesser General Public License
3+
*
4+
* This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools).
5+
*
6+
* FRE-NCtools is free software: you can redistribute it and/or modify it under
7+
* the terms of the GNU Lesser General Public License as published by
8+
* the Free Software Foundation, either version 3 of the License, or (at
9+
* your option) any later version.
10+
*
11+
* FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT
12+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14+
* for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public
17+
* License along with FRE-NCTools. If not, see
18+
* <http://www.gnu.org/licenses/>.
19+
**********************************************************************/
20+
/*
21+
Copyright (C) 2011 NOAA Geophysical Fluid Dynamics Lab, Princeton, NJ
22+
*/
23+
#include <stdio.h>
24+
#include <stdlib.h>
25+
#include <math.h>
26+
#include <openacc.h>
27+
#include "mosaic_util.h"
28+
#include "interp_gpu.h"
29+
#include "interp.h"
30+
#include "create_xgrid.h"
31+
#include "create_xgrid_gpu.h"
32+
#include "create_xgrid_utils_gpu.h"
33+
34+
/*------------------------------------------------------------------------------
35+
void conserve_interp()
36+
conservative interpolation through exchange grid.
37+
Currently only first order interpolation are implemented here.
38+
----------------------------------------------------------------------------*/
39+
void conserve_interp_gpu(int nx_src, int ny_src, int nx_dst, int ny_dst, double *x_src,
40+
double *y_src, double *x_dst, double *y_dst,
41+
double *mask_src, double *data_src, double *data_dst )
42+
{
43+
Grid_cells_struct_config output_grid_cells;
44+
Interp_per_input_tile interp_gpu;
45+
46+
int ncells_src = nx_src * ny_src;
47+
int ncells_dst = nx_dst * ny_dst;
48+
int ngridpts_src = (nx_src+1) * (ny_src+1);
49+
int ngridpts_dst = (nx_dst+1) * (ny_dst+1);
50+
int jstart = 0;
51+
int jend = ny_src-1;
52+
53+
int *approx_nxcells; approx_nxcells = (int *)malloc(ncells_src*sizeof(int));
54+
int *ij2_start; ij2_start = (int *)malloc(ncells_src*sizeof(int));
55+
int *ij2_end; ij2_end = (int *)malloc(ncells_src*sizeof(int));
56+
57+
#pragma acc enter data copyin(x_src[:ngridpts_src], y_src[:ngridpts_src], \
58+
x_dst[:ngridpts_dst], y_dst[:ngridpts_dst],\
59+
mask_src[:ncells_src])
60+
61+
get_grid_cell_struct_gpu(nx_dst, ny_dst, x_dst, y_dst, &output_grid_cells);
62+
63+
#pragma acc enter data create(approx_nxcells[:ncells_src], ij2_start[:ncells_src], ij2_end[:ncells_src])
64+
65+
int upbound_nxcells = get_upbound_nxcells_2dx2d_gpu(nx_src, ny_src, nx_dst, ny_dst, jstart, jend,
66+
x_src, y_src, x_dst, y_dst, mask_src, &output_grid_cells,
67+
approx_nxcells, ij2_start, ij2_end);
68+
69+
int nxgrid = create_xgrid_2dx2d_order1_gpu(nx_src, ny_src, nx_dst, ny_dst, jstart, jend, x_src, y_src,
70+
x_dst, y_dst, upbound_nxcells, mask_src, &output_grid_cells,
71+
approx_nxcells, ij2_start, ij2_end, &interp_gpu);
72+
73+
#pragma acc exit data copyout(interp_gpu.input_parent_cell_index[:nxgrid], \
74+
interp_gpu.output_parent_cell_index[:nxgrid], \
75+
interp_gpu.xcell_area[:nxgrid])
76+
77+
int *xgrid_ij1 = interp_gpu.input_parent_cell_index;
78+
int *xgrid_ij2 = interp_gpu.output_parent_cell_index;
79+
double *xgrid_area = interp_gpu.xcell_area;
80+
double *dst_area; dst_area = (double *)malloc(nx_dst*ny_dst*sizeof(double));
81+
82+
for(int n=0; n<nx_dst*ny_dst; n++) {
83+
dst_area[n] = 0.0;
84+
data_dst[n] = 0.0;
85+
}
86+
87+
/* The source grid may not cover the destination grid
88+
so need to sum of exchange grid area to get dst_area
89+
get_grid_area(&nx_dst, &ny_dst, x_dst, y_dst, dst_area);
90+
*/
91+
92+
for(int n=0; n<nxgrid; n++) dst_area[xgrid_ij2[n]] += xgrid_area[n];
93+
for(int n=0; n<nxgrid; n++) {
94+
int ij2 = xgrid_ij2[n];
95+
int ij1 = xgrid_ij1[n];
96+
double area_frac = xgrid_area[n]/dst_area[ij2];
97+
data_dst[ij2] += data_src[ij1]*area_frac;
98+
}
99+
100+
free(xgrid_ij1);
101+
free(xgrid_ij2);
102+
free(xgrid_area);
103+
free(approx_nxcells);
104+
free(ij2_start);
105+
free(ij2_end);
106+
107+
}; /* conserve_interp */
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/***********************************************************************
2+
* GNU Lesser General Public License
3+
*
4+
* This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools).
5+
*
6+
* FRE-NCtools is free software: you can redistribute it and/or modify it under
7+
* the terms of the GNU Lesser General Public License as published by
8+
* the Free Software Foundation, either version 3 of the License, or (at
9+
* your option) any later version.
10+
*
11+
* FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT
12+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14+
* for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public
17+
* License along with FRE-NCTools. If not, see
18+
* <http://www.gnu.org/licenses/>.
19+
**********************************************************************/
20+
/*
21+
Copyright (C) 2011 NOAA Geophysical Fluid Dynamics Lab, Princeton, NJ
22+
*/
23+
#ifndef INTERP_H_
24+
#define INTERP_H_
25+
/*********************************************************************
26+
interp.h
27+
This header files contains defition of some interpolation routine (1-D or 2-D).
28+
contact: Zhi.Liang@noaa.gov
29+
*********************************************************************/
30+
void conserve_interp_gpu(int nx_src, int ny_src, int nx_dst, int ny_dst, double *x_src,
31+
double *y_src, double *x_dst, double *y_dst,
32+
double *mask_src, double *data_src, double *data_dst );
33+
#endif

tools/make_topog/Makefile.am

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,16 @@ if WITH_MPI
2222
bin_PROGRAMS += make_topog_parallel
2323
endif
2424

25-
AM_CFLAGS = -I$(top_srcdir)/tools/libfrencutils \
26-
$(NETCDF_CFLAGS)
25+
AM_CFLAGS = -I$(top_srcdir)/tools/libfrencutils $(OPENACC_CFLAGS) \
26+
$(NETCDF_CFLAGS) -I$(top_srcdir)/tools/libfrencutils_gpu
2727
LDADD = $(NETCDF_LDFLAGS) $(NETCDF_LIBS) $(RPATH_FLAGS)
2828

2929
make_topog_SOURCES = make_topog.c \
3030
topog.c \
3131
topog.h
32-
make_topog_LDADD = $(top_builddir)/tools/libfrencutils/libfrencutils.a $(LDADD)
32+
make_topog_LDADD = $(top_builddir)/tools/libfrencutils/libfrencutils.a \
33+
$(top_builddir)/tools/libfrencutils_gpu/libfrencutils_gpu.a \
34+
$(LDADD)
3335

3436
make_topog_parallel_SOURCES = $(make_topog_SOURCES)
3537
make_topog_parallel_CFLAGS = -Duse_libMPI $(MPI_CFLAGS) $(AM_CFLAGS)

0 commit comments

Comments
 (0)