Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 133 additions & 27 deletions src/pcms/adapter/omega_h/omega_h_field2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,16 @@ class MeshFieldBackendImpl : public MeshFieldBackend
{
size_t stride = num_nodes * num_components;
auto topo = static_cast<MeshField::Mesh_Topology>(dim);
Kokkos::View<Real*, DefaultExecutionSpace::memory_space> data_d(
"data_d", data.size());
Kokkos::deep_copy(data_d, Kokkos::View<const Real*, HostMemorySpace>(
data.data_handle(), data.size()));
Omega_h::parallel_for(
mesh_.nents(dim), OMEGA_H_LAMBDA(size_t ent) {
for (size_t n = 0; n < num_nodes; ++n) {
for (size_t c = 0; c < num_components; ++c) {
shape_field_(ent, n, c, topo) =
data[ent * stride + n * num_components + c];
data_d[ent * stride + n * num_components + c];
}
}
});
Expand All @@ -45,15 +49,20 @@ class MeshFieldBackendImpl : public MeshFieldBackend
{
size_t stride = num_nodes * num_components;
auto topo = static_cast<MeshField::Mesh_Topology>(dim);
Kokkos::View<Real*, DefaultExecutionSpace::memory_space> data_d(
"data_d", data.size());
Omega_h::parallel_for(
mesh_.nents(dim), OMEGA_H_LAMBDA(size_t ent) {
for (size_t n = 0; n < num_nodes; ++n) {
for (size_t c = 0; c < num_components; ++c) {
data[ent * stride + n * num_components + c] =
data_d[ent * stride + n * num_components + c] =
shape_field_(ent, n, c, topo);
}
}
});
Kokkos::deep_copy(
Kokkos::View<Real*, HostMemorySpace>(data.data_handle(), data.size()),
data_d);
}

private:
Expand All @@ -64,31 +73,110 @@ class MeshFieldBackendImpl : public MeshFieldBackend
ShapeField shape_field_;
};

struct ComputeOffsetsFunctor
{
Kokkos::View<LO*, HostMemorySpace> offsets_;
Kokkos::View<LO*, HostMemorySpace> elem_counts_;

ComputeOffsetsFunctor(Kokkos::View<LO*, HostMemorySpace> offsets,
Kokkos::View<LO*, HostMemorySpace> elem_counts)
: offsets_(offsets), elem_counts_(elem_counts)
{
}

KOKKOS_INLINE_FUNCTION
void operator()(LO i, LO& partial, bool is_final) const
{
if (is_final) {
offsets_(i) = partial;
}
partial += elem_counts_(i);
}
};

struct CountPointsPerElementFunctor
{
Kokkos::View<LO*> elem_counts_;
Kokkos::View<GridPointSearch::Result*> search_results_;

CountPointsPerElementFunctor(
Kokkos::View<LO*> elem_counts,
Kokkos::View<GridPointSearch::Result*> search_results)
: elem_counts_(elem_counts), search_results_(search_results)
{
}

KOKKOS_INLINE_FUNCTION
void operator()(LO i) const
{
auto [dim, elem_idx, coord] = search_results_(i);
Kokkos::atomic_add(&elem_counts_(elem_idx), 1);
}
};

struct FillCoordinatesAndIndicesFunctor
{
Omega_h::Mesh& mesh_;
Kokkos::View<LO*> elem_counts_;
Kokkos::View<LO*> offsets_;
Kokkos::View<Real**> coordinates_;
Kokkos::View<LO*> indices_;
Kokkos::View<GridPointSearch::Result*> search_results_;

FillCoordinatesAndIndicesFunctor(
Omega_h::Mesh& mesh, Kokkos::View<LO*> elem_counts,
Kokkos::View<LO*> offsets, Kokkos::View<Real**> coordinates,
Kokkos::View<LO*> indices,
Kokkos::View<GridPointSearch::Result*> search_results)
: mesh_(mesh),
elem_counts_(elem_counts),
offsets_(offsets),
coordinates_(coordinates),
indices_(indices),
search_results_(search_results)
{
}

KOKKOS_INLINE_FUNCTION
void operator()(LO i) const
{
auto [dim, elem_idx, coord] = search_results_(i);
// currently don't handle case where point is on a boundary
PCMS_ALWAYS_ASSERT(static_cast<int>(dim) == mesh_.dim());
// element should be inside the domain (positive)
PCMS_ALWAYS_ASSERT(elem_idx >= 0 && elem_idx < mesh_.nelems());
LO count = Kokkos::atomic_sub_fetch(&elem_counts_(elem_idx), 1);
LO index = offsets_(elem_idx) + count - 1;
for (int j = 0; j < (mesh_.dim() + 1); ++j) {
coordinates_(index, j) = coord[j];
}
indices_(index) = i;
}
};

struct OmegaHField2LocalizationHint
{
OmegaHField2LocalizationHint(
Omega_h::Mesh& mesh, Kokkos::View<GridPointSearch::Result*> search_results)
Omega_h::Mesh& mesh,
Kokkos::View<GridPointSearch::Result*, HostMemorySpace> search_results)
: offsets_("", mesh.nelems() + 1),
coordinates_("", search_results.size(), mesh.dim() + 1),
indices_("", search_results.size())
{
Kokkos::View<LO*> elem_counts("", mesh.nelems());
Kokkos::View<LO*, HostMemorySpace> elem_counts("", mesh.nelems());

for (size_t i = 0; i < search_results.size(); ++i) {
auto [dim, elem_idx, coord] = search_results(i);
elem_counts[elem_idx] += 1;
}

LO total;

ComputeOffsetsFunctor functor(offsets_, elem_counts);
Kokkos::parallel_scan(
mesh.nelems(),
KOKKOS_LAMBDA(LO i, LO & partial, bool is_final) {
if (is_final) {
offsets_(i) = partial;
}
partial += elem_counts(i);
},
total);
"ComputeOffsets",
Kokkos::RangePolicy<HostMemorySpace::execution_space>(0, mesh.nelems()),
functor, total);
offsets_(mesh.nelems()) = total;

for (size_t i = 0; i < search_results.size(); ++i) {
Expand All @@ -108,11 +196,11 @@ struct OmegaHField2LocalizationHint
}

// offsets is the number of points in each element
Kokkos::View<LO*> offsets_;
Kokkos::View<LO*, HostMemorySpace> offsets_;
// coordinates are the parametric coordinates of each point
Kokkos::View<Real**> coordinates_;
Kokkos::View<Real**, HostMemorySpace> coordinates_;
// indices are the index of the original point
Kokkos::View<LO*> indices_;
Kokkos::View<LO*, HostMemorySpace> indices_;
};

/*
Expand Down Expand Up @@ -207,13 +295,17 @@ LocalizationHint OmegaHField2::GetLocalizationHint(

auto coordinates = coordinate_view.GetCoordinates();
Kokkos::View<Real* [2]> coords("coords", coordinates.size() / 2);
Kokkos::parallel_for(
coordinates.size() / 2, KOKKOS_LAMBDA(LO i) {
coords(i, 0) = coordinates(i, 0);
coords(i, 1) = coordinates(i, 1);
});
auto coordinates_host_tmp = Kokkos::create_mirror_view(coords);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these arrays should probably be stored at the class level so we aren't creating them lots of times, but we can leave that to a future optimization pass.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created a issue including this optimization #233

Kokkos::deep_copy(
coordinates_host_tmp,
Kokkos::View<const Real**, HostMemorySpace>(
coordinates.data_handle(), coordinates.extent(0), coordinates.extent(1)));
Kokkos::deep_copy(coords, coordinates_host_tmp);
auto results = search_(coords);
auto hint = std::make_shared<OmegaHField2LocalizationHint>(mesh_, results);
Kokkos::View<GridPointSearch::Result*, HostMemorySpace> results_h(
"results_h", results.size());
Kokkos::deep_copy(results_h, results);
auto hint = std::make_shared<OmegaHField2LocalizationHint>(mesh_, results_h);

return LocalizationHint{hint};
}
Expand All @@ -233,13 +325,27 @@ void OmegaHField2::Evaluate(LocalizationHint location,
OmegaHField2LocalizationHint hint =
*reinterpret_cast<OmegaHField2LocalizationHint*>(location.data.get());

auto eval_results = mesh_field_->evaluate(hint.coordinates_, hint.offsets_);
Kokkos::View<Real**> coordinates_d(
"coordinates_d", hint.coordinates_.extent(0), hint.coordinates_.extent(1));
auto coordinates_host_tmp = Kokkos::create_mirror_view(coordinates_d);
Kokkos::deep_copy(coordinates_host_tmp, hint.coordinates_);
Kokkos::deep_copy(coordinates_d, coordinates_host_tmp);
Copy link

Copilot AI Nov 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Redundant copy operations. Lines 340-342 create a mirror view, copy from hint.coordinates_ (HostMemorySpace) to the mirror, then copy to device. This is unnecessary - you can directly deep_copy from host to device. Replace lines 338-342 with: Kokkos::View<Real**> coordinates_d("coordinates_d", hint.coordinates_.extent(0), hint.coordinates_.extent(1)); Kokkos::deep_copy(coordinates_d, hint.coordinates_);

Suggested change
auto coordinates_host_tmp = Kokkos::create_mirror_view(coordinates_d);
Kokkos::deep_copy(coordinates_host_tmp, hint.coordinates_);
Kokkos::deep_copy(coordinates_d, coordinates_host_tmp);
Kokkos::deep_copy(coordinates_d, hint.coordinates_);

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Direct 2D deep_copy fails due to layout differences between views. The extra copy here follows the workaround from the Kokkos documentation for handling layout mismatches.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs a comment in place to explain what you are doing and link to the kokkos documentation.

Kokkos::View<LO*> offsets_d("offsets_d", hint.offsets_.extent(0));
Kokkos::deep_copy(offsets_d, hint.offsets_);
auto eval_results = mesh_field_->evaluate(coordinates_d, offsets_d);

auto eval_results_tmp =
Kokkos::create_mirror_view(Kokkos::HostSpace(), eval_results);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally we can move the construction of these views to be a one-time operation based on the field layout which will tell us how big the arrays will need to be.

Kokkos::deep_copy(eval_results_tmp, eval_results);
Kokkos::View<Real**, HostMemorySpace> eval_results_h(
"eval_results_h", eval_results_tmp.extent(0), eval_results_tmp.extent(1));
Kokkos::deep_copy(eval_results_h, eval_results_tmp);
Copy link

Copilot AI Nov 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Redundant copy operations. Lines 347-352 create a mirror, copy from device to mirror, create another host view, then copy mirror to host view. This is unnecessary - you can directly use the mirror view. Replace lines 347-352 with: auto eval_results_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), eval_results);

Suggested change
auto eval_results_tmp =
Kokkos::create_mirror_view(Kokkos::HostSpace(), eval_results);
Kokkos::deep_copy(eval_results_tmp, eval_results);
Kokkos::View<Real**, HostMemorySpace> eval_results_h(
"eval_results_h", eval_results_tmp.extent(0), eval_results_tmp.extent(1));
Kokkos::deep_copy(eval_results_h, eval_results_tmp);
auto eval_results_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), eval_results);

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue regarding layout incompatible views.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is a repeating pattern, let's consider making a function called deep_copy_mismatch_layouts (or something along these lines). We can then put the documentation with the function. I think that will make this code much more obvious.

Rank1View<Real, HostMemorySpace> values = results.GetValues();

Kokkos::parallel_for(
eval_results.size(),
KOKKOS_LAMBDA(LO i) { values[hint.indices_(i)] = eval_results(i, 0); });
"CopyEvalResultsToValues",
Kokkos::RangePolicy<HostMemorySpace::execution_space>(
0, eval_results_h.extent(0)),
KOKKOS_LAMBDA(LO i) { values[hint.indices_(i)] = eval_results_h(i, 0); });
}

void OmegaHField2::EvaluateGradient(
Expand Down Expand Up @@ -288,7 +394,7 @@ void OmegaHField2::Deserialize(
if (owned[i])
sorted_buffer[i] = buffer[permutation[i]];
}
const auto sorted_buffer_d = Omega_h::Read<Real>(sorted_buffer);
SetDOFHolderData(pcms::make_const_array_view(sorted_buffer_d));

SetDOFHolderData(pcms::make_const_array_view(sorted_buffer));
}
} // namespace pcms
2 changes: 1 addition & 1 deletion src/pcms/adapter/omega_h/omega_h_field2.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class OmegaHField2 : public FieldT<Real>
Omega_h::Mesh& mesh_;
std::unique_ptr<MeshFieldBackend> mesh_field_;
GridPointSearch search_;
Kokkos::View<Real*> dof_holder_data_;
Kokkos::View<Real*, HostMemorySpace> dof_holder_data_;
};

} // namespace pcms
Expand Down
Loading