@@ -29,12 +29,16 @@ class MeshFieldBackendImpl : public MeshFieldBackend
2929 {
3030 size_t stride = num_nodes * num_components;
3131 auto topo = static_cast <MeshField::Mesh_Topology>(dim);
32+ Kokkos::View<Real*, DefaultExecutionSpace::memory_space> data_d (
33+ " data_d" , data.size ());
34+ Kokkos::deep_copy (data_d, Kokkos::View<const Real*, HostMemorySpace>(
35+ data.data_handle (), data.size ()));
3236 Omega_h::parallel_for (
3337 mesh_.nents (dim), OMEGA_H_LAMBDA (size_t ent) {
3438 for (size_t n = 0 ; n < num_nodes; ++n) {
3539 for (size_t c = 0 ; c < num_components; ++c) {
3640 shape_field_ (ent, n, c, topo) =
37- data [ent * stride + n * num_components + c];
41+ data_d [ent * stride + n * num_components + c];
3842 }
3943 }
4044 });
@@ -45,15 +49,20 @@ class MeshFieldBackendImpl : public MeshFieldBackend
4549 {
4650 size_t stride = num_nodes * num_components;
4751 auto topo = static_cast <MeshField::Mesh_Topology>(dim);
52+ Kokkos::View<Real*, DefaultExecutionSpace::memory_space> data_d (
53+ " data_d" , data.size ());
4854 Omega_h::parallel_for (
4955 mesh_.nents (dim), OMEGA_H_LAMBDA (size_t ent) {
5056 for (size_t n = 0 ; n < num_nodes; ++n) {
5157 for (size_t c = 0 ; c < num_components; ++c) {
52- data [ent * stride + n * num_components + c] =
58+ data_d [ent * stride + n * num_components + c] =
5359 shape_field_ (ent, n, c, topo);
5460 }
5561 }
5662 });
63+ Kokkos::deep_copy (
64+ Kokkos::View<Real*, HostMemorySpace>(data.data_handle (), data.size ()),
65+ data_d);
5766 }
5867
5968private:
@@ -64,31 +73,110 @@ class MeshFieldBackendImpl : public MeshFieldBackend
6473 ShapeField shape_field_;
6574};
6675
76+ struct ComputeOffsetsFunctor
77+ {
78+ Kokkos::View<LO*, HostMemorySpace> offsets_;
79+ Kokkos::View<LO*, HostMemorySpace> elem_counts_;
80+
81+ ComputeOffsetsFunctor (Kokkos::View<LO*, HostMemorySpace> offsets,
82+ Kokkos::View<LO*, HostMemorySpace> elem_counts)
83+ : offsets_(offsets), elem_counts_(elem_counts)
84+ {
85+ }
86+
87+ KOKKOS_INLINE_FUNCTION
88+ void operator ()(LO i, LO& partial, bool is_final) const
89+ {
90+ if (is_final) {
91+ offsets_ (i) = partial;
92+ }
93+ partial += elem_counts_ (i);
94+ }
95+ };
96+
97+ struct CountPointsPerElementFunctor
98+ {
99+ Kokkos::View<LO*> elem_counts_;
100+ Kokkos::View<GridPointSearch::Result*> search_results_;
101+
102+ CountPointsPerElementFunctor (
103+ Kokkos::View<LO*> elem_counts,
104+ Kokkos::View<GridPointSearch::Result*> search_results)
105+ : elem_counts_(elem_counts), search_results_(search_results)
106+ {
107+ }
108+
109+ KOKKOS_INLINE_FUNCTION
110+ void operator ()(LO i) const
111+ {
112+ auto [dim, elem_idx, coord] = search_results_ (i);
113+ Kokkos::atomic_add (&elem_counts_ (elem_idx), 1 );
114+ }
115+ };
116+
117+ struct FillCoordinatesAndIndicesFunctor
118+ {
119+ Omega_h::Mesh& mesh_;
120+ Kokkos::View<LO*> elem_counts_;
121+ Kokkos::View<LO*> offsets_;
122+ Kokkos::View<Real**> coordinates_;
123+ Kokkos::View<LO*> indices_;
124+ Kokkos::View<GridPointSearch::Result*> search_results_;
125+
126+ FillCoordinatesAndIndicesFunctor (
127+ Omega_h::Mesh& mesh, Kokkos::View<LO*> elem_counts,
128+ Kokkos::View<LO*> offsets, Kokkos::View<Real**> coordinates,
129+ Kokkos::View<LO*> indices,
130+ Kokkos::View<GridPointSearch::Result*> search_results)
131+ : mesh_(mesh),
132+ elem_counts_ (elem_counts),
133+ offsets_(offsets),
134+ coordinates_(coordinates),
135+ indices_(indices),
136+ search_results_(search_results)
137+ {
138+ }
139+
140+ KOKKOS_INLINE_FUNCTION
141+ void operator ()(LO i) const
142+ {
143+ auto [dim, elem_idx, coord] = search_results_ (i);
144+ // currently don't handle case where point is on a boundary
145+ PCMS_ALWAYS_ASSERT (static_cast <int >(dim) == mesh_.dim ());
146+ // element should be inside the domain (positive)
147+ PCMS_ALWAYS_ASSERT (elem_idx >= 0 && elem_idx < mesh_.nelems ());
148+ LO count = Kokkos::atomic_sub_fetch (&elem_counts_ (elem_idx), 1 );
149+ LO index = offsets_ (elem_idx) + count - 1 ;
150+ for (int j = 0 ; j < (mesh_.dim () + 1 ); ++j) {
151+ coordinates_ (index, j) = coord[j];
152+ }
153+ indices_ (index) = i;
154+ }
155+ };
156+
67157struct OmegaHField2LocalizationHint
68158{
69159 OmegaHField2LocalizationHint (
70- Omega_h::Mesh& mesh, Kokkos::View<GridPointSearch::Result*> search_results)
160+ Omega_h::Mesh& mesh,
161+ Kokkos::View<GridPointSearch::Result*, HostMemorySpace> search_results)
71162 : offsets_(" " , mesh.nelems() + 1 ),
72163 coordinates_ (" " , search_results.size(), mesh.dim() + 1),
73164 indices_(" " , search_results.size())
74165 {
75- Kokkos::View<LO*> elem_counts (" " , mesh.nelems ());
166+ Kokkos::View<LO*, HostMemorySpace > elem_counts (" " , mesh.nelems ());
76167
77168 for (size_t i = 0 ; i < search_results.size (); ++i) {
78169 auto [dim, elem_idx, coord] = search_results (i);
79170 elem_counts[elem_idx] += 1 ;
80171 }
81172
82173 LO total;
174+
175+ ComputeOffsetsFunctor functor (offsets_, elem_counts);
83176 Kokkos::parallel_scan (
84- mesh.nelems (),
85- KOKKOS_LAMBDA (LO i, LO & partial, bool is_final) {
86- if (is_final) {
87- offsets_ (i) = partial;
88- }
89- partial += elem_counts (i);
90- },
91- total);
177+ " ComputeOffsets" ,
178+ Kokkos::RangePolicy<HostMemorySpace::execution_space>(0 , mesh.nelems ()),
179+ functor, total);
92180 offsets_ (mesh.nelems ()) = total;
93181
94182 for (size_t i = 0 ; i < search_results.size (); ++i) {
@@ -108,11 +196,11 @@ struct OmegaHField2LocalizationHint
108196 }
109197
110198 // offsets is the number of points in each element
111- Kokkos::View<LO*> offsets_;
199+ Kokkos::View<LO*, HostMemorySpace > offsets_;
112200 // coordinates are the parametric coordinates of each point
113- Kokkos::View<Real**> coordinates_;
201+ Kokkos::View<Real**, HostMemorySpace > coordinates_;
114202 // indices are the index of the original point
115- Kokkos::View<LO*> indices_;
203+ Kokkos::View<LO*, HostMemorySpace > indices_;
116204};
117205
118206/*
@@ -207,13 +295,14 @@ LocalizationHint OmegaHField2::GetLocalizationHint(
207295
208296 auto coordinates = coordinate_view.GetCoordinates ();
209297 Kokkos::View<Real* [2 ]> coords (" coords" , coordinates.size () / 2 );
210- Kokkos::parallel_for (
211- coordinates.size () / 2 , KOKKOS_LAMBDA (LO i) {
212- coords (i, 0 ) = coordinates (i, 0 );
213- coords (i, 1 ) = coordinates (i, 1 );
214- });
298+ auto coordinates_host = Kokkos::View<const Real**, HostMemorySpace>(
299+ coordinates.data_handle (), coordinates.extent (0 ), coordinates.extent (1 ));
300+ deep_copy_mismatch_layouts (coords, coordinates_host);
215301 auto results = search_ (coords);
216- auto hint = std::make_shared<OmegaHField2LocalizationHint>(mesh_, results);
302+ Kokkos::View<GridPointSearch::Result*, HostMemorySpace> results_h (
303+ " results_h" , results.size ());
304+ Kokkos::deep_copy (results_h, results);
305+ auto hint = std::make_shared<OmegaHField2LocalizationHint>(mesh_, results_h);
217306
218307 return LocalizationHint{hint};
219308}
@@ -233,13 +322,21 @@ void OmegaHField2::Evaluate(LocalizationHint location,
233322 OmegaHField2LocalizationHint hint =
234323 *reinterpret_cast <OmegaHField2LocalizationHint*>(location.data .get ());
235324
236- auto eval_results = mesh_field_->evaluate (hint.coordinates_ , hint.offsets_ );
237-
325+ Kokkos::View<Real**> coordinates_d (
326+ " coordinates_d" , hint.coordinates_ .extent (0 ), hint.coordinates_ .extent (1 ));
327+ deep_copy_mismatch_layouts (coordinates_d, hint.coordinates_ );
328+ Kokkos::View<LO*> offsets_d (" offsets_d" , hint.offsets_ .extent (0 ));
329+ Kokkos::deep_copy (offsets_d, hint.offsets_ );
330+ auto eval_results = mesh_field_->evaluate (coordinates_d, offsets_d);
331+ Kokkos::View<Real**, HostMemorySpace> eval_results_h (
332+ " eval_results_h" , eval_results.extent (0 ), eval_results.extent (1 ));
333+ deep_copy_mismatch_layouts (eval_results_h, eval_results);
238334 Rank1View<Real, HostMemorySpace> values = results.GetValues ();
239-
240335 Kokkos::parallel_for (
241- eval_results.size (),
242- KOKKOS_LAMBDA (LO i) { values[hint.indices_ (i)] = eval_results (i, 0 ); });
336+ " CopyEvalResultsToValues" ,
337+ Kokkos::RangePolicy<HostMemorySpace::execution_space>(
338+ 0 , eval_results_h.extent (0 )),
339+ KOKKOS_LAMBDA (LO i) { values[hint.indices_ (i)] = eval_results_h (i, 0 ); });
243340}
244341
245342void OmegaHField2::EvaluateGradient (
@@ -288,7 +385,7 @@ void OmegaHField2::Deserialize(
288385 if (owned[i])
289386 sorted_buffer[i] = buffer[permutation[i]];
290387 }
291- const auto sorted_buffer_d = Omega_h::Read<Real>(sorted_buffer);
292- SetDOFHolderData (pcms::make_const_array_view (sorted_buffer_d ));
388+
389+ SetDOFHolderData (pcms::make_const_array_view (sorted_buffer ));
293390}
294391} // namespace pcms
0 commit comments