Skip to content

Commit c6791b3

Browse files
authored
Merge pull request #227 from Sichao25/yus/debug_gpu_build
Debug simplified field compilation and runtime issues
2 parents 9d02cf2 + 33258d6 commit c6791b3

15 files changed

+392
-114
lines changed

src/pcms/adapter/omega_h/omega_h_field2.cpp

Lines changed: 125 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,16 @@ class MeshFieldBackendImpl : public MeshFieldBackend
2929
{
3030
size_t stride = num_nodes * num_components;
3131
auto topo = static_cast<MeshField::Mesh_Topology>(dim);
32+
Kokkos::View<Real*, DefaultExecutionSpace::memory_space> data_d(
33+
"data_d", data.size());
34+
Kokkos::deep_copy(data_d, Kokkos::View<const Real*, HostMemorySpace>(
35+
data.data_handle(), data.size()));
3236
Omega_h::parallel_for(
3337
mesh_.nents(dim), OMEGA_H_LAMBDA(size_t ent) {
3438
for (size_t n = 0; n < num_nodes; ++n) {
3539
for (size_t c = 0; c < num_components; ++c) {
3640
shape_field_(ent, n, c, topo) =
37-
data[ent * stride + n * num_components + c];
41+
data_d[ent * stride + n * num_components + c];
3842
}
3943
}
4044
});
@@ -45,15 +49,20 @@ class MeshFieldBackendImpl : public MeshFieldBackend
4549
{
4650
size_t stride = num_nodes * num_components;
4751
auto topo = static_cast<MeshField::Mesh_Topology>(dim);
52+
Kokkos::View<Real*, DefaultExecutionSpace::memory_space> data_d(
53+
"data_d", data.size());
4854
Omega_h::parallel_for(
4955
mesh_.nents(dim), OMEGA_H_LAMBDA(size_t ent) {
5056
for (size_t n = 0; n < num_nodes; ++n) {
5157
for (size_t c = 0; c < num_components; ++c) {
52-
data[ent * stride + n * num_components + c] =
58+
data_d[ent * stride + n * num_components + c] =
5359
shape_field_(ent, n, c, topo);
5460
}
5561
}
5662
});
63+
Kokkos::deep_copy(
64+
Kokkos::View<Real*, HostMemorySpace>(data.data_handle(), data.size()),
65+
data_d);
5766
}
5867

5968
private:
@@ -64,31 +73,110 @@ class MeshFieldBackendImpl : public MeshFieldBackend
6473
ShapeField shape_field_;
6574
};
6675

76+
struct ComputeOffsetsFunctor
77+
{
78+
Kokkos::View<LO*, HostMemorySpace> offsets_;
79+
Kokkos::View<LO*, HostMemorySpace> elem_counts_;
80+
81+
ComputeOffsetsFunctor(Kokkos::View<LO*, HostMemorySpace> offsets,
82+
Kokkos::View<LO*, HostMemorySpace> elem_counts)
83+
: offsets_(offsets), elem_counts_(elem_counts)
84+
{
85+
}
86+
87+
KOKKOS_INLINE_FUNCTION
88+
void operator()(LO i, LO& partial, bool is_final) const
89+
{
90+
if (is_final) {
91+
offsets_(i) = partial;
92+
}
93+
partial += elem_counts_(i);
94+
}
95+
};
96+
97+
struct CountPointsPerElementFunctor
98+
{
99+
Kokkos::View<LO*> elem_counts_;
100+
Kokkos::View<GridPointSearch::Result*> search_results_;
101+
102+
CountPointsPerElementFunctor(
103+
Kokkos::View<LO*> elem_counts,
104+
Kokkos::View<GridPointSearch::Result*> search_results)
105+
: elem_counts_(elem_counts), search_results_(search_results)
106+
{
107+
}
108+
109+
KOKKOS_INLINE_FUNCTION
110+
void operator()(LO i) const
111+
{
112+
auto [dim, elem_idx, coord] = search_results_(i);
113+
Kokkos::atomic_add(&elem_counts_(elem_idx), 1);
114+
}
115+
};
116+
117+
struct FillCoordinatesAndIndicesFunctor
118+
{
119+
Omega_h::Mesh& mesh_;
120+
Kokkos::View<LO*> elem_counts_;
121+
Kokkos::View<LO*> offsets_;
122+
Kokkos::View<Real**> coordinates_;
123+
Kokkos::View<LO*> indices_;
124+
Kokkos::View<GridPointSearch::Result*> search_results_;
125+
126+
FillCoordinatesAndIndicesFunctor(
127+
Omega_h::Mesh& mesh, Kokkos::View<LO*> elem_counts,
128+
Kokkos::View<LO*> offsets, Kokkos::View<Real**> coordinates,
129+
Kokkos::View<LO*> indices,
130+
Kokkos::View<GridPointSearch::Result*> search_results)
131+
: mesh_(mesh),
132+
elem_counts_(elem_counts),
133+
offsets_(offsets),
134+
coordinates_(coordinates),
135+
indices_(indices),
136+
search_results_(search_results)
137+
{
138+
}
139+
140+
KOKKOS_INLINE_FUNCTION
141+
void operator()(LO i) const
142+
{
143+
auto [dim, elem_idx, coord] = search_results_(i);
144+
// currently don't handle case where point is on a boundary
145+
PCMS_ALWAYS_ASSERT(static_cast<int>(dim) == mesh_.dim());
146+
// element should be inside the domain (positive)
147+
PCMS_ALWAYS_ASSERT(elem_idx >= 0 && elem_idx < mesh_.nelems());
148+
LO count = Kokkos::atomic_sub_fetch(&elem_counts_(elem_idx), 1);
149+
LO index = offsets_(elem_idx) + count - 1;
150+
for (int j = 0; j < (mesh_.dim() + 1); ++j) {
151+
coordinates_(index, j) = coord[j];
152+
}
153+
indices_(index) = i;
154+
}
155+
};
156+
67157
struct OmegaHField2LocalizationHint
68158
{
69159
OmegaHField2LocalizationHint(
70-
Omega_h::Mesh& mesh, Kokkos::View<GridPointSearch::Result*> search_results)
160+
Omega_h::Mesh& mesh,
161+
Kokkos::View<GridPointSearch::Result*, HostMemorySpace> search_results)
71162
: offsets_("", mesh.nelems() + 1),
72163
coordinates_("", search_results.size(), mesh.dim() + 1),
73164
indices_("", search_results.size())
74165
{
75-
Kokkos::View<LO*> elem_counts("", mesh.nelems());
166+
Kokkos::View<LO*, HostMemorySpace> elem_counts("", mesh.nelems());
76167

77168
for (size_t i = 0; i < search_results.size(); ++i) {
78169
auto [dim, elem_idx, coord] = search_results(i);
79170
elem_counts[elem_idx] += 1;
80171
}
81172

82173
LO total;
174+
175+
ComputeOffsetsFunctor functor(offsets_, elem_counts);
83176
Kokkos::parallel_scan(
84-
mesh.nelems(),
85-
KOKKOS_LAMBDA(LO i, LO & partial, bool is_final) {
86-
if (is_final) {
87-
offsets_(i) = partial;
88-
}
89-
partial += elem_counts(i);
90-
},
91-
total);
177+
"ComputeOffsets",
178+
Kokkos::RangePolicy<HostMemorySpace::execution_space>(0, mesh.nelems()),
179+
functor, total);
92180
offsets_(mesh.nelems()) = total;
93181

94182
for (size_t i = 0; i < search_results.size(); ++i) {
@@ -108,11 +196,11 @@ struct OmegaHField2LocalizationHint
108196
}
109197

110198
// offsets is the number of points in each element
111-
Kokkos::View<LO*> offsets_;
199+
Kokkos::View<LO*, HostMemorySpace> offsets_;
112200
// coordinates are the parametric coordinates of each point
113-
Kokkos::View<Real**> coordinates_;
201+
Kokkos::View<Real**, HostMemorySpace> coordinates_;
114202
// indices are the index of the original point
115-
Kokkos::View<LO*> indices_;
203+
Kokkos::View<LO*, HostMemorySpace> indices_;
116204
};
117205

118206
/*
@@ -207,13 +295,14 @@ LocalizationHint OmegaHField2::GetLocalizationHint(
207295

208296
auto coordinates = coordinate_view.GetCoordinates();
209297
Kokkos::View<Real* [2]> coords("coords", coordinates.size() / 2);
210-
Kokkos::parallel_for(
211-
coordinates.size() / 2, KOKKOS_LAMBDA(LO i) {
212-
coords(i, 0) = coordinates(i, 0);
213-
coords(i, 1) = coordinates(i, 1);
214-
});
298+
auto coordinates_host = Kokkos::View<const Real**, HostMemorySpace>(
299+
coordinates.data_handle(), coordinates.extent(0), coordinates.extent(1));
300+
deep_copy_mismatch_layouts(coords, coordinates_host);
215301
auto results = search_(coords);
216-
auto hint = std::make_shared<OmegaHField2LocalizationHint>(mesh_, results);
302+
Kokkos::View<GridPointSearch::Result*, HostMemorySpace> results_h(
303+
"results_h", results.size());
304+
Kokkos::deep_copy(results_h, results);
305+
auto hint = std::make_shared<OmegaHField2LocalizationHint>(mesh_, results_h);
217306

218307
return LocalizationHint{hint};
219308
}
@@ -233,13 +322,21 @@ void OmegaHField2::Evaluate(LocalizationHint location,
233322
OmegaHField2LocalizationHint hint =
234323
*reinterpret_cast<OmegaHField2LocalizationHint*>(location.data.get());
235324

236-
auto eval_results = mesh_field_->evaluate(hint.coordinates_, hint.offsets_);
237-
325+
Kokkos::View<Real**> coordinates_d(
326+
"coordinates_d", hint.coordinates_.extent(0), hint.coordinates_.extent(1));
327+
deep_copy_mismatch_layouts(coordinates_d, hint.coordinates_);
328+
Kokkos::View<LO*> offsets_d("offsets_d", hint.offsets_.extent(0));
329+
Kokkos::deep_copy(offsets_d, hint.offsets_);
330+
auto eval_results = mesh_field_->evaluate(coordinates_d, offsets_d);
331+
Kokkos::View<Real**, HostMemorySpace> eval_results_h(
332+
"eval_results_h", eval_results.extent(0), eval_results.extent(1));
333+
deep_copy_mismatch_layouts(eval_results_h, eval_results);
238334
Rank1View<Real, HostMemorySpace> values = results.GetValues();
239-
240335
Kokkos::parallel_for(
241-
eval_results.size(),
242-
KOKKOS_LAMBDA(LO i) { values[hint.indices_(i)] = eval_results(i, 0); });
336+
"CopyEvalResultsToValues",
337+
Kokkos::RangePolicy<HostMemorySpace::execution_space>(
338+
0, eval_results_h.extent(0)),
339+
KOKKOS_LAMBDA(LO i) { values[hint.indices_(i)] = eval_results_h(i, 0); });
243340
}
244341

245342
void OmegaHField2::EvaluateGradient(
@@ -288,7 +385,7 @@ void OmegaHField2::Deserialize(
288385
if (owned[i])
289386
sorted_buffer[i] = buffer[permutation[i]];
290387
}
291-
const auto sorted_buffer_d = Omega_h::Read<Real>(sorted_buffer);
292-
SetDOFHolderData(pcms::make_const_array_view(sorted_buffer_d));
388+
389+
SetDOFHolderData(pcms::make_const_array_view(sorted_buffer));
293390
}
294391
} // namespace pcms

src/pcms/adapter/omega_h/omega_h_field2.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ class OmegaHField2 : public FieldT<Real>
6161
Omega_h::Mesh& mesh_;
6262
std::unique_ptr<MeshFieldBackend> mesh_field_;
6363
GridPointSearch search_;
64-
Kokkos::View<Real*> dof_holder_data_;
64+
Kokkos::View<Real*, HostMemorySpace> dof_holder_data_;
6565
};
6666

6767
} // namespace pcms

0 commit comments

Comments
 (0)