@@ -157,6 +157,9 @@ double forward(f_type *u, f_type *velocity, f_type *damp,
157157 f_type sum_z = coeff [0 ] * u [current_snapshot ];
158158
159159 // radius of the stencil
160+ #ifdef GPU_OPENACC
161+ #pragma acc loop seq
162+ #endif
160163 for (size_t ir = 1 ; ir <= stencil_radius ; ir ++ ){
161164 //neighbors in the Y direction
162165 sum_y += coeff [ir ] * (u [current_snapshot + ir ] + u [current_snapshot - ir ]);
@@ -235,15 +238,24 @@ double forward(f_type *u, f_type *velocity, f_type *damp,
235238 size_t kws_index_z = offset_src_kws_index_z ;
236239
237240 // for each source point in the Z axis
241+ #ifdef GPU_OPENACC
242+ #pragma acc loop seq
243+ #endif
238244 for (size_t i = src_z_begin ; i <= src_z_end ; i ++ ){
239245 size_t kws_index_x = offset_src_kws_index_z + src_z_num_points ;
240246
241247 // for each source point in the X axis
248+ #ifdef GPU_OPENACC
249+ #pragma acc loop seq
250+ #endif
242251 for (size_t j = src_x_begin ; j <= src_x_end ; j ++ ){
243252
244253 size_t kws_index_y = offset_src_kws_index_z + src_z_num_points + src_x_num_points ;
245254
246255 // for each source point in the Y axis
256+ #ifdef GPU_OPENACC
257+ #pragma acc loop seq
258+ #endif
247259 for (size_t k = src_y_begin ; k <= src_y_end ; k ++ ){
248260
249261 f_type kws = src_points_values [kws_index_z ] * src_points_values [kws_index_x ] * src_points_values [kws_index_y ];
@@ -311,6 +323,9 @@ double forward(f_type *u, f_type *velocity, f_type *damp,
311323
312324 // null neumann on the left
313325 if (y_before == 2 ){
326+ #ifdef GPU_OPENACC
327+ #pragma acc loop seq
328+ #endif
314329 for (size_t ir = 1 ; ir <= stencil_radius ; ir ++ ){
315330 size_t domain_offset = (i * nx + j ) * ny + stencil_radius ;
316331 size_t next_snapshot = next_t * domain_size + domain_offset ;
@@ -327,6 +342,9 @@ double forward(f_type *u, f_type *velocity, f_type *damp,
327342
328343 // null neumann on the right
329344 if (y_after == 2 ){
345+ #ifdef GPU_OPENACC
346+ #pragma acc loop seq
347+ #endif
330348 for (size_t ir = 1 ; ir <= stencil_radius ; ir ++ ){
331349 size_t domain_offset = (i * nx + j ) * ny + (ny - stencil_radius - 1 );
332350 size_t next_snapshot = next_t * domain_size + domain_offset ;
@@ -362,6 +380,9 @@ double forward(f_type *u, f_type *velocity, f_type *damp,
362380
363381 // null neumann on the front
364382 if (x_before == 2 ){
383+ #ifdef GPU_OPENACC
384+ #pragma acc loop seq
385+ #endif
365386 for (size_t ir = 1 ; ir <= stencil_radius ; ir ++ ){
366387 size_t domain_offset = (i * nx + stencil_radius ) * ny + k ;
367388 size_t next_snapshot = next_t * domain_size + domain_offset ;
@@ -378,6 +399,9 @@ double forward(f_type *u, f_type *velocity, f_type *damp,
378399
379400 // null neumann on the back
380401 if (x_after == 2 ){
402+ #ifdef GPU_OPENACC
403+ #pragma acc loop seq
404+ #endif
381405 for (size_t ir = 1 ; ir <= stencil_radius ; ir ++ ){
382406 size_t domain_offset = (i * nx + (nx - stencil_radius - 1 )) * ny + k ;
383407 size_t next_snapshot = next_t * domain_size + domain_offset ;
@@ -413,6 +437,9 @@ double forward(f_type *u, f_type *velocity, f_type *damp,
413437
414438 // null neumann on the top
415439 if (z_before == 2 ){
440+ #ifdef GPU_OPENACC
441+ #pragma acc loop seq
442+ #endif
416443 for (size_t ir = 1 ; ir <= stencil_radius ; ir ++ ){
417444 size_t domain_offset = (stencil_radius * nx + j ) * ny + k ;
418445 size_t next_snapshot = next_t * domain_size + domain_offset ;
@@ -429,6 +456,9 @@ double forward(f_type *u, f_type *velocity, f_type *damp,
429456
430457 // null neumann on the bottom
431458 if (z_after == 2 ){
459+ #ifdef GPU_OPENACC
460+ #pragma acc loop seq
461+ #endif
432462 for (size_t ir = 1 ; ir <= stencil_radius ; ir ++ ){
433463 size_t domain_offset = ((nz - stencil_radius - 1 ) * nx + j ) * ny + k ;
434464 size_t next_snapshot = next_t * domain_size + domain_offset ;
@@ -487,15 +517,24 @@ double forward(f_type *u, f_type *velocity, f_type *damp,
487517 size_t kws_index_z = offset_rec_kws_index_z ;
488518
489519 // for each receiver point in the Z axis
520+ #ifdef GPU_OPENACC
521+ #pragma acc loop seq
522+ #endif
490523 for (size_t i = rec_z_begin ; i <= rec_z_end ; i ++ ){
491524 size_t kws_index_x = offset_rec_kws_index_z + rec_z_num_points ;
492525
493526 // for each receiver point in the X axis
527+ #ifdef GPU_OPENACC
528+ #pragma acc loop seq
529+ #endif
494530 for (size_t j = rec_x_begin ; j <= rec_x_end ; j ++ ){
495531
496532 size_t kws_index_y = offset_rec_kws_index_z + rec_z_num_points + rec_x_num_points ;
497533
498534 // for each source point in the Y axis
535+ #ifdef GPU_OPENACC
536+ #pragma acc loop seq
537+ #endif
499538 for (size_t k = rec_y_begin ; k <= rec_y_end ; k ++ ){
500539
501540 f_type kws = rec_points_values [kws_index_z ] * rec_points_values [kws_index_x ] * rec_points_values [kws_index_y ];
0 commit comments