Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions src/pcms/adapter/omega_h/omega_h_field2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class MeshFieldBackendImpl : public MeshFieldBackend
Kokkos::deep_copy(data_d, Kokkos::View<const Real*, HostMemorySpace>(
data.data_handle(), data.size()));
Omega_h::parallel_for(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it make sense to swap out the Omega_h::parallel_for with Kokkos::parallel_for if we are using the Kokkos macros?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense. I think we should be consistent and use only one of them if possible.

mesh_.nents(dim), OMEGA_H_LAMBDA(size_t ent) {
mesh_.nents(dim), KOKKOS_CLASS_LAMBDA(size_t ent) {
for (size_t n = 0; n < num_nodes; ++n) {
for (size_t c = 0; c < num_components; ++c) {
shape_field_(ent, n, c, topo) =
Expand All @@ -52,7 +52,7 @@ class MeshFieldBackendImpl : public MeshFieldBackend
Kokkos::View<Real*, DefaultExecutionSpace::memory_space> data_d(
"data_d", data.size());
Omega_h::parallel_for(
mesh_.nents(dim), OMEGA_H_LAMBDA(size_t ent) {
mesh_.nents(dim), KOKKOS_CLASS_LAMBDA(size_t ent) {
for (size_t n = 0; n < num_nodes; ++n) {
for (size_t c = 0; c < num_components; ++c) {
data_d[ent * stride + n * num_components + c] =
Expand Down Expand Up @@ -122,6 +122,7 @@ struct FillCoordinatesAndIndicesFunctor
Kokkos::View<Real**> coordinates_;
Kokkos::View<LO*> indices_;
Kokkos::View<GridPointSearch::Result*> search_results_;
Omega_h::Int dim_;

FillCoordinatesAndIndicesFunctor(
Omega_h::Mesh& mesh, Kokkos::View<LO*> elem_counts,
Expand All @@ -133,21 +134,23 @@ struct FillCoordinatesAndIndicesFunctor
offsets_(offsets),
coordinates_(coordinates),
indices_(indices),
search_results_(search_results)
search_results_(search_results),
dim_(mesh.dim())
{
}

KOKKOS_INLINE_FUNCTION
void operator()(LO i) const
{
auto [dim, elem_idx, coord] = search_results_(i);
// disable the host assertion macro for device code
// currently don't handle case where point is on a boundary
PCMS_ALWAYS_ASSERT(static_cast<int>(dim) == mesh_.dim());
// PCMS_ALWAYS_ASSERT(static_cast<int>(dim) == mesh_.dim());
// element should be inside the domain (positive)
PCMS_ALWAYS_ASSERT(elem_idx >= 0 && elem_idx < mesh_.nelems());
// PCMS_ALWAYS_ASSERT(elem_idx >= 0 && elem_idx < mesh_.nelems());
LO count = Kokkos::atomic_sub_fetch(&elem_counts_(elem_idx), 1);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we define a Host macro that only evaluates on host?

LO index = offsets_(elem_idx) + count - 1;
for (int j = 0; j < (mesh_.dim() + 1); ++j) {
for (int j = 0; j < (dim_ + 1); ++j) {
coordinates_(index, j) = coord[j];
}
indices_(index) = i;
Expand Down
7 changes: 4 additions & 3 deletions src/pcms/adapter/point_cloud/point_cloud.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ void PointCloud::SetDOFHolderData(Rank1View<const Real, HostMemorySpace> data)
PCMS_ALWAYS_ASSERT(data.size() == data_.size());
Kokkos::parallel_for(
Kokkos::RangePolicy<pcms::HostMemorySpace::execution_space>(0, data.size()),
KOKKOS_LAMBDA(int i) { data_host_(i) = data[i]; });
KOKKOS_CLASS_LAMBDA(int i) { data_host_(i) = data[i]; });
Kokkos::deep_copy(data_, data_host_);
}

Expand Down Expand Up @@ -100,7 +100,7 @@ int PointCloud::Serialize(
if (buffer.size() > 0) {
Kokkos::parallel_for(
data_.size(),
KOKKOS_LAMBDA(int i) { buffer[permutation[i]] = data_(i); });
KOKKOS_CLASS_LAMBDA(int i) { buffer[permutation[i]] = data_(i); });
}
return data_.size();
}
Expand All @@ -111,7 +111,8 @@ void PointCloud::Deserialize(
{
PCMS_FUNCTION_TIMER;
Kokkos::parallel_for(
data_.size(), KOKKOS_LAMBDA(int i) { data_(i) = buffer[permutation[i]]; });
data_.size(),
KOKKOS_CLASS_LAMBDA(int i) { data_(i) = buffer[permutation[i]]; });
}

} // namespace pcms
1 change: 0 additions & 1 deletion src/pcms/field_layout_communicator.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ redev::LOs ConstructPermutation(GlobalIDView<HostMemorySpace> local_gids,
break;
}

LO local_index = 0;
redev::LOs permutation;
permutation.reserve(local_gids.size());
for (int e = 0; e < ent_offsets.size() - 1; ++e) {
Expand Down
12 changes: 6 additions & 6 deletions test/test_field_communication.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ void client1(MPI_Comm comm, Omega_h::Mesh& mesh, std::string comm_name,
const auto n = layout->GetNumOwnedDofHolder();
Omega_h::HostWrite<Real> ids(n);
PCMS_ALWAYS_ASSERT(n == gids.size());
Kokkos::parallel_for(
"id gid", Kokkos::RangePolicy<pcms::HostMemorySpace::execution_space>(0, n),
KOKKOS_LAMBDA(int i) { ids[i] = gids[i]; });
for (size_t i = 0; i < static_cast<size_t>(n); ++i) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since we know this is on the host, use std::copy

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would use OpenMP threads for host parallel if available. What is the specific warning that is being resolved with this? Basicly I want to decide if it makes sense to swich this to a host for loop or stay with kokkos host parallel_for.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar warnings about calling host functions from device functions.

ids[i] = gids[i];
}

auto field = layout->CreateField();
field->SetDOFHolderData(pcms::make_const_array_view(ids));
Expand Down Expand Up @@ -157,9 +157,9 @@ void server(MPI_Comm comm, Omega_h::Mesh& mesh, std::string comm_name,
pcms::CoordinateSystem::Cartesian);
const auto n = layout->GetNumOwnedDofHolder();
Omega_h::HostWrite<Real> ids(n);
Kokkos::parallel_for(
"id 0", Kokkos::RangePolicy<pcms::HostMemorySpace::execution_space>(0, n),
KOKKOS_LAMBDA(int i) { ids[i] = 0; });
for (size_t i = 0; i < static_cast<size_t>(n); ++i) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since we know this is on the host use std::fill

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would use OpenMP threads for host parallel if available. What is the specific warning that is being resolved with this? Basicly I want to decide if it makes sense to swich this to a host for loop or stay with kokkos host parallel_for.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The warning is:

warning #20011-D: calling a __host__ function("Omega_h::HostWrite<double> ::operator [](int) const") from a __host__ __device__ function("server(int,  ::Omega_h::Mesh &,    ::std::__cxx11::basic_string<char,     ::std::char_traits<char> , ::std::allocator<char> > , int, const     ::std::map<   ::std::__cxx11::basic_string<char,     ::std::char_traits<char> , ::std::allocator<char> > ,    ::std::__cxx11::basic_string<char,     ::std::char_traits<char> , ::std::allocator<char> > ,     ::std::less<   ::std::__cxx11::basic_string<char,     ::std::char_traits<char> , ::std::allocator<char> > > , ::std::allocator<    ::std::pair<const    ::std::__cxx11::basic_string<char,     ::std::char_traits<char> , ::std::allocator<char> > ,    ::std::__cxx11::basic_string<char,     ::std::char_traits<char> , ::std::allocator<char> > > > >  &, const    ::std::__cxx11::basic_string<char,     ::std::char_traits<char> , ::std::allocator<char> >  &)::[lambda(int) (instance 1)]::operator () const") is not allowed

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see...since it's in the HostSpace using KOKKOS_LAMBDA is marking that as __host__ __device__. Probably just using [=](int i){} for the lambda will resolve this. Alternatively there may be a KOKKOS_HOST_LAMBDA?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. I'll update accordingly.

ids[i] = 0;
}

auto field = layout->CreateField();
pcms::FieldLayoutCommunicator<pcms::Real> layout_comm1(
Expand Down
17 changes: 8 additions & 9 deletions test/test_field_copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ void test_copy(Omega_h::CommPtr world, int dim, int order, int num_components)
pcms::CoordinateSystem::Cartesian);
int ndata = layout->GetNumOwnedDofHolder() * num_components;
Omega_h::HostWrite<Real> ids(ndata);
Kokkos::parallel_for(
Kokkos::RangePolicy<pcms::HostMemorySpace::execution_space>(0, ndata),
KOKKOS_LAMBDA(int i) { ids[i] = i; });
for (size_t i = 0; i < static_cast<size_t>(ndata); ++i) {
ids[i] = i;
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use std::iota

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would use OpenMP threads for host parallel if available. What is the specific warning that is being resolved with this? Basicly I want to decide if it makes sense to swich this to a host for loop or stay with kokkos host parallel_for.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The warning is:

warning #20011-D: calling a __host__ function("Omega_h::HostWrite<double> ::operator [](int) const") from a __host__ __device__ function("test_copy(    ::std::shared_ptr< ::Omega_h::Comm> , int, int, int)::[lambda(int) (instance 1)]::operator () const") is not allowed


auto original = layout->CreateField();
original->SetDOFHolderData(pcms::make_const_array_view(ids));
Expand All @@ -36,12 +36,11 @@ void test_copy(Omega_h::CommPtr world, int dim, int order, int num_components)

REQUIRE(copied_array.size() == ndata);
int sum = 0;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<pcms::HostMemorySpace::execution_space>(0, ndata),
KOKKOS_LAMBDA(int i, int& local_sum) {
local_sum += std::abs(ids[i] - copied_array[i]) < 1e-12;
},
sum);
for (size_t i = 0; i < static_cast<size_t>(ndata); ++i) {
if (std::abs(ids[i] - copied_array[i]) < 1e-12) {
sum++;
}
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would use OpenMP threads for host parallel if available. What is the specific warning that is being resolved with this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar warnings about calling host functions from device functions.

REQUIRE(sum == ndata);
}

Expand Down
2 changes: 1 addition & 1 deletion test/test_interpolation_on_ltx_mesh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ TEST_CASE("Test Interpolation on LTX Mesh", "[interpolation]")
output_xgc_vtu_filename.c_str());

// ------------------ Verification ------------------ //
double tol = 10.0 / 100.0; // 10 percent tolerance
// double tol = 10.0 / 100.0; // 10 percent tolerance
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is unused, just delete it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is intended to be used with the commented-out verification code. Would you like me to remove that entire block?

 // ------------------ Verification ------------------ //
  double tol = 10.0 / 100.0; // 10 percent tolerance
  // XGC Originated Values
  /*
  for (int i = 0; i < xgc_num_nodes; ++i) {
    CHECK_THAT(interpolated_back_density_at_xgc_nodes[i],
               Catch::Matchers::WithinRel(density_at_xgc_nodes[i], tol) ||
                 Catch::Matchers::WithinAbs(density_at_xgc_nodes[i], tol));
    CHECK_THAT(interpolated_back_temp_at_xgc_nodes[i],
               Catch::Matchers::WithinRel(temp_at_xgc_nodes[i], tol) ||
                 Catch::Matchers::WithinAbs(temp_at_xgc_nodes[i], tol));
  }
  */

// XGC Originated Values
/*
for (int i = 0; i < xgc_num_nodes; ++i) {
Expand Down