Skip to content

Commit 8ae62c4

Browse files
ReduceOps.eval with BoxND (#4875)
## Summary This PR enables the use of `ReduceOps.eval()` with a BoxND of any dimension. This is achieved using BoxIndexerND, just like ParallelForRNG, which also provides support for 64-bit indexing to support very large boxes. There was issue where the extra size added by BoxIndexerND pushes an EB kernel over the parameter size limit for SYCL.  This was fixed by using MultiArray4 for some of the arrays instead. ``` error: Total size of kernel arguments exceeds limit! Total arguments size: 2064, limit: 2048 in kernel: 'typeinfo name for void amrex::launch<256, void amrex::ReduceOps<amrex::ReduceOpMax>::eval<amrex::ReduceData<int>, amrex::EB2::Level::coarsenFromFine(amrex::EB2::Level&, bool)::'lambda4'(int, int, int), 3>(amrex::BoxND<3> const&, amrex::ReduceData<int>&, amrex::EB2::Level::coarsenFromFine(amrex::EB2::Level&, bool)::'lambda4'(int, int, int) const&)::'lambda'(amrex::Gpu::Handler const&)>(int, unsigned long, amrex::gpuStream_t, amrex::EB2::Level::coarsenFromFine(amrex::EB2::Level&, bool)::'lambda4'(int, int, int) const&)::'lambda'(sycl::_V1::handler&)::operator()(sycl::_V1::handler&) const::'lambda'(sycl::_V1::nd_item<1>)' error: backend compiler failed build. ``` https://github.com/AMReX-Codes/amrex/blob/1fe7883af64291e5c67d0d4ce65eb2e758dab412/Src/EB/AMReX_EB2_Level.cpp#L364-L379
1 parent 8c41360 commit 8ae62c4

File tree

4 files changed

+159
-144
lines changed

4 files changed

+159
-144
lines changed

Src/Base/AMReX_GpuLaunchFunctsC.H

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,15 @@ namespace detail {
3232
auto call_f_scalar_handler (F const& f, N i)
3333
noexcept -> decltype(f(0))
3434
{
35-
f(i);
35+
return f(i);
3636
}
3737

3838
template <typename F, typename N>
3939
AMREX_FORCE_INLINE
4040
auto call_f_scalar_handler (F const& f, N i)
4141
noexcept -> decltype(f(0,Gpu::Handler{}))
4242
{
43-
f(i, Gpu::Handler{});
43+
return f(i, Gpu::Handler{});
4444
}
4545

4646
// call_f_intvect_inner
@@ -50,31 +50,31 @@ namespace detail {
5050
auto call_f_intvect_inner (std::index_sequence<Ns...>, F const& f, IntVectND<1> iv, Args...args)
5151
noexcept -> decltype(f(0, 0, 0, args...))
5252
{
53-
f(iv[0], 0, 0, args...);
53+
return f(iv[0], 0, 0, args...);
5454
}
5555

5656
template <typename F, std::size_t...Ns, class...Args>
5757
AMREX_FORCE_INLINE
5858
auto call_f_intvect_inner (std::index_sequence<Ns...>, F const& f, IntVectND<2> iv, Args...args)
5959
noexcept -> decltype(f(0, 0, 0, args...))
6060
{
61-
f(iv[0], iv[1], 0, args...);
61+
return f(iv[0], iv[1], 0, args...);
6262
}
6363

6464
template <typename F, int dim, std::size_t...Ns, class...Args>
6565
AMREX_FORCE_INLINE
6666
auto call_f_intvect_inner (std::index_sequence<Ns...>, F const& f, IntVectND<dim> iv, Args...args)
6767
noexcept -> decltype(f(iv, args...))
6868
{
69-
f(iv, args...);
69+
return f(iv, args...);
7070
}
7171

7272
template <typename F, int dim, std::size_t...Ns, class...Args>
7373
AMREX_FORCE_INLINE
7474
auto call_f_intvect_inner (std::index_sequence<Ns...>, F const& f, IntVectND<dim> iv, Args...args)
7575
noexcept -> decltype(f(iv[Ns]..., args...))
7676
{
77-
f(iv[Ns]..., args...);
77+
return f(iv[Ns]..., args...);
7878
}
7979

8080
// call_f_intvect_engine
@@ -84,7 +84,7 @@ namespace detail {
8484
auto call_f_intvect_engine (F const& f, IntVectND<dim> iv, RandomEngine engine)
8585
noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, engine))
8686
{
87-
call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, engine);
87+
return call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, engine);
8888
}
8989

9090
// call_f_intvect_handler
@@ -94,15 +94,15 @@ namespace detail {
9494
auto call_f_intvect_handler (F const& f, IntVectND<dim> iv)
9595
noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv))
9696
{
97-
call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv);
97+
return call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv);
9898
}
9999

100100
template <typename F, int dim>
101101
AMREX_FORCE_INLINE
102102
auto call_f_intvect_handler (F const& f, IntVectND<dim> iv)
103103
noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, Gpu::Handler{}))
104104
{
105-
call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, Gpu::Handler{});
105+
return call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, Gpu::Handler{});
106106
}
107107

108108
// call_f_intvect_ncomp_engine
@@ -112,7 +112,7 @@ namespace detail {
112112
auto call_f_intvect_ncomp_engine (F const& f, IntVectND<dim> iv, T n, RandomEngine engine)
113113
noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, n, engine))
114114
{
115-
call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, n, engine);
115+
return call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, n, engine);
116116
}
117117

118118
// call_f_intvect_ncomp_handler
@@ -122,15 +122,15 @@ namespace detail {
122122
auto call_f_intvect_ncomp_handler (F const& f, IntVectND<dim> iv, T n)
123123
noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, n))
124124
{
125-
call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, n);
125+
return call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, n);
126126
}
127127

128128
template <typename F, typename T, int dim>
129129
AMREX_FORCE_INLINE
130130
auto call_f_intvect_ncomp_handler (F const& f, IntVectND<dim> iv, T n)
131131
noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, n, Gpu::Handler{}))
132132
{
133-
call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, n, Gpu::Handler{});
133+
return call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, n, Gpu::Handler{});
134134
}
135135

136136
}

Src/Base/AMReX_GpuLaunchFunctsG.H

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@ namespace detail {
1414
auto call_f_scalar_handler (F const& f, N i, Gpu::Handler const&)
1515
noexcept -> decltype(f(0))
1616
{
17-
f(i);
17+
return f(i);
1818
}
1919

2020
template <typename F, typename N>
2121
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
2222
auto call_f_scalar_handler (F const& f, N i, Gpu::Handler const& handler)
2323
noexcept -> decltype(f(0,Gpu::Handler{}))
2424
{
25-
f(i, handler);
25+
return f(i, handler);
2626
}
2727

2828
// call_f_intvect_inner
@@ -32,31 +32,31 @@ namespace detail {
3232
auto call_f_intvect_inner (std::index_sequence<Ns...>, F const& f, IntVectND<1> iv, Args...args)
3333
noexcept -> decltype(f(0, 0, 0, args...))
3434
{
35-
f(iv[0], 0, 0, args...);
35+
return f(iv[0], 0, 0, args...);
3636
}
3737

3838
template <typename F, std::size_t...Ns, class...Args>
3939
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
4040
auto call_f_intvect_inner (std::index_sequence<Ns...>, F const& f, IntVectND<2> iv, Args...args)
4141
noexcept -> decltype(f(0, 0, 0, args...))
4242
{
43-
f(iv[0], iv[1], 0, args...);
43+
return f(iv[0], iv[1], 0, args...);
4444
}
4545

4646
template <typename F, int dim, std::size_t...Ns, class...Args>
4747
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
4848
auto call_f_intvect_inner (std::index_sequence<Ns...>, F const& f, IntVectND<dim> iv, Args...args)
4949
noexcept -> decltype(f(iv, args...))
5050
{
51-
f(iv, args...);
51+
return f(iv, args...);
5252
}
5353

5454
template <typename F, int dim, std::size_t...Ns, class...Args>
5555
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
5656
auto call_f_intvect_inner (std::index_sequence<Ns...>, F const& f, IntVectND<dim> iv, Args...args)
5757
noexcept -> decltype(f(iv[Ns]..., args...))
5858
{
59-
f(iv[Ns]..., args...);
59+
return f(iv[Ns]..., args...);
6060
}
6161

6262
// call_f_intvect
@@ -66,7 +66,7 @@ namespace detail {
6666
auto call_f_intvect (F const& f, IntVectND<dim> iv)
6767
noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv))
6868
{
69-
call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv);
69+
return call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv);
7070
}
7171

7272
// call_f_intvect_engine
@@ -76,7 +76,7 @@ namespace detail {
7676
auto call_f_intvect_engine (F const& f, IntVectND<dim> iv, RandomEngine engine)
7777
noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, engine))
7878
{
79-
call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, engine);
79+
return call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, engine);
8080
}
8181

8282
// call_f_intvect_handler
@@ -86,15 +86,15 @@ namespace detail {
8686
auto call_f_intvect_handler (F const& f, IntVectND<dim> iv, Gpu::Handler const&)
8787
noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv))
8888
{
89-
call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv);
89+
return call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv);
9090
}
9191

9292
template <typename F, int dim>
9393
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
9494
auto call_f_intvect_handler (F const& f, IntVectND<dim> iv, Gpu::Handler const& handler)
9595
noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, Gpu::Handler{}))
9696
{
97-
call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, handler);
97+
return call_f_intvect_inner(std::make_index_sequence<dim>(), f, iv, handler);
9898
}
9999

100100
// call_f_intvect_ncomp

0 commit comments

Comments
 (0)