Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 172 additions & 0 deletions include/deal.II/multigrid/mg_transfer_matrix_free.h
Original file line number Diff line number Diff line change
Expand Up @@ -1745,6 +1745,178 @@ MGTransferMatrixFree<dim, Number, MemorySpace>::interpolate_to_mg(
}


/**
* Template specialization for device vectors.
* Currently works by transferring device vectors back to the host and
* performing the transfer operation on the host. Eventually this should be
* replaced by all operations occurring on the device.
*/

template <int dim, typename Number>
class MGTransferMatrixFree<dim, Number, MemorySpace::Default>
: public MGTransferBase<
LinearAlgebra::distributed::Vector<Number, MemorySpace::Default>>
{
public:
using VectorType =
LinearAlgebra::distributed::Vector<Number, MemorySpace::Default>;
using VectorTypeHost =
LinearAlgebra::distributed::Vector<Number, dealii::MemorySpace::Host>;

MGTransferMatrixFree()
: transfer()
{}

MGTransferMatrixFree(const MGConstrainedDoFs &mg_constrained_dofs)
: transfer(mg_constrained_dofs)
{}

MGTransferMatrixFree(
const MGLevelObject<MGTwoLevelTransfer<dim, VectorTypeHost>> &mg_transfers,
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a bit surprising that we initialize with host vectors here, right? That would mean if we make this work on device later, user code needs to change?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes this is a good point but rather annoying to work around. These inputs are used to initialize the underlying MGTransferMatrixFree class that is defined on the host which does the "workaround" part of this. To switch these to be device elements we would need some method by which we could convert them to equivalent objects on the host which I'm not sure how easy or feasible that is.

const std::function<void(const unsigned int, VectorTypeHost &)>
&initialize_dof_vector)
: transfer(mg_transfers, initialize_dof_vector)
{}

void
build(const std::vector<std::shared_ptr<const Utilities::MPI::Partitioner>>
&external_partitioners = {})
{
transfer.build(external_partitioners);
}

void
build(const std::function<void(const unsigned int, VectorType &)>
&initialize_dof_vector)
{
transfer.build(initialize_dof_vector);
}

void
build(const DoFHandler<dim> &dof_handler,
const std::vector<std::shared_ptr<const Utilities::MPI::Partitioner>>
&external_partitioners = {})
{
transfer.build(dof_handler, external_partitioners);
}

void
build(const DoFHandler<dim> &dof_handler,
const std::function<void(const unsigned int, VectorType &)>
&initialize_dof_vector)
{
transfer.build(dof_handler, initialize_dof_vector);
}


template <typename Number2>
void
copy_to_mg(
const DoFHandler<dim> &dof_handler,
MGLevelObject<VectorType> &dst,
const LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default>
&src) const
{
MGLevelObject<VectorTypeHost> dst_host(dst.min_level(), dst.max_level());
LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host>
src_host;

copy_to_host(src_host, src);

transfer.copy_to_mg(dof_handler, dst_host, src_host);

for (unsigned int l = dst.min_level(); l <= dst.max_level(); ++l)
copy_from_host(dst[l], dst_host[l]);
}

template <typename Number2>
void
copy_from_mg(
const DoFHandler<dim> &dof_handler,
LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default> &dst,
const MGLevelObject<VectorType> &src) const
{
LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host>
dst_host;
MGLevelObject<VectorTypeHost> src_host(src.min_level(), src.max_level());

dst_host.reinit(dst.get_partitioner());
for (unsigned int l = src.min_level(); l <= src.max_level(); ++l)
copy_to_host(src_host[l], src[l]);

transfer.copy_from_mg(dof_handler, dst_host, src_host);

copy_from_host(dst, dst_host);
}

void
prolongate(const unsigned int to_level,
VectorType &dst,
const VectorType &src) const override
{
VectorTypeHost dst_host;
VectorTypeHost src_host;

dst_host.reinit(dst.get_partitioner());
copy_to_host(src_host, src);

transfer.prolongate(to_level, dst_host, src_host);

copy_from_host(dst, dst_host);
}

void
restrict_and_add(const unsigned int from_level,
VectorType &dst,
const VectorType &src) const override
{
VectorTypeHost dst_host;
VectorTypeHost src_host;

copy_to_host(dst_host, dst);
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here it is necessary though

copy_to_host(src_host, src);

transfer.restrict_and_add(from_level, dst_host, src_host);

copy_from_host(dst, dst_host);
}

private:
MGTransferMatrixFree<dim, Number, dealii::MemorySpace::Host> transfer;

template <typename Number2>
void
copy_to_host(
LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host> &dst,
const LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default>
&src) const
{
LinearAlgebra::ReadWriteVector<Number2> rw_vector(
src.get_partitioner()->locally_owned_range());
rw_vector.import_elements(src, VectorOperation::insert);

dst.reinit(src.get_partitioner());
dst.import_elements(rw_vector, VectorOperation::insert);
}

template <typename Number2>
void
copy_from_host(
LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default> &dst,
const LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host>
&src) const
{
LinearAlgebra::ReadWriteVector<Number2> rw_vector(
src.get_partitioner()->locally_owned_range());
rw_vector.import_elements(src, VectorOperation::insert);

if (dst.size() == 0)
dst.reinit(src.get_partitioner());
dst.import_elements(rw_vector, VectorOperation::insert);
}
};



template <int dim, typename Number, typename TransferType>
template <typename BlockVectorType2>
Expand Down
190 changes: 190 additions & 0 deletions tests/multigrid/transfer_matrix_free_device_01.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
// ------------------------------------------------------------------------
//
// SPDX-License-Identifier: LGPL-2.1-or-later
// Copyright (C) 2016 - 2023 by the deal.II authors
//
// This file is part of the deal.II library.
//
// Part of the source code is dual licensed under Apache-2.0 WITH
// LLVM-exception OR LGPL-2.1-or-later. Detailed license information
// governing the source code and code contributions can be found in
// LICENSE.md and CONTRIBUTING.md at the top level directory of deal.II.
//
// ------------------------------------------------------------------------


// Check MGTransferMatrixFree by comparison with MGTransferPrebuilt on a
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update this comment to include device

// series of meshes with uniform meshes for FE_Q

#include <deal.II/distributed/tria.h>

#include <deal.II/fe/fe_q.h>

#include <deal.II/grid/grid_generator.h>

#include <deal.II/lac/la_parallel_vector.h>

#include <deal.II/multigrid/mg_base.h>
#include <deal.II/multigrid/mg_base.templates.h>
#include <deal.II/multigrid/mg_transfer.h>
#include <deal.II/multigrid/mg_transfer_matrix_free.h>

#include "../tests.h"

using namespace dealii;

template <typename Number2>
void
copy_to_host(
LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host> &dst,
const LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default> &src)
{
LinearAlgebra::ReadWriteVector<Number2> rw_vector(
src.get_partitioner()->locally_owned_range());
rw_vector.import_elements(src, VectorOperation::insert);

dst.reinit(src.get_partitioner());
dst.import_elements(rw_vector, VectorOperation::insert);
}

template <typename Number2>
void
copy_from_host(
LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default> &dst,
const LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host>
&src)
{
LinearAlgebra::ReadWriteVector<Number2> rw_vector(
src.get_partitioner()->locally_owned_range());
rw_vector.import_elements(src, VectorOperation::insert);

if (dst.size() == 0)
dst.reinit(src.get_partitioner());
dst.import_elements(rw_vector, VectorOperation::insert);
}

template <int dim, typename Number>
void
check(const unsigned int fe_degree)
{
FE_Q<dim> fe(fe_degree);
deallog << "FE: " << fe.get_name() << std::endl;

// run a few different sizes...
unsigned int sizes[] = {1, 2, 3, 4, 5, 6, 8};
for (unsigned int cycle = 0; cycle < sizeof(sizes) / sizeof(unsigned int);
++cycle)
{
unsigned int n_refinements = 0;
unsigned int n_subdiv = sizes[cycle];
if (n_subdiv > 1)
while (n_subdiv % 2 == 0)
{
n_refinements += 1;
n_subdiv /= 2;
}
n_refinements += 3 - dim;
if (fe_degree < 3)
n_refinements += 1;
parallel::distributed::Triangulation<dim> tr(
MPI_COMM_WORLD,
Triangulation<dim>::limit_level_difference_at_vertices,
parallel::distributed::Triangulation<
dim>::construct_multigrid_hierarchy);
GridGenerator::subdivided_hyper_cube(tr, n_subdiv);
tr.refine_global(n_refinements);
deallog << "no. cells: " << tr.n_global_active_cells() << std::endl;

DoFHandler<dim> mgdof(tr);
mgdof.distribute_dofs(fe);
mgdof.distribute_mg_dofs();

MGConstrainedDoFs mg_constrained_dofs;
mg_constrained_dofs.initialize(mgdof);
mg_constrained_dofs.make_zero_boundary_constraints(mgdof, {0});

// build host reference
MGTransferMatrixFree<dim, Number, dealii::MemorySpace::Host>
transfer_host(mg_constrained_dofs);
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like you are using local smoothing multi grid here. Not wrong, but portable MatrixFree is currently only doing global coarsening.

transfer_host.build(mgdof);

// build device transfer
MGTransferMatrixFree<dim, Number, dealii::MemorySpace::Default>
transfer_device(mg_constrained_dofs);
transfer_device.build(mgdof);

// check prolongation for all levels using random vector
for (unsigned int level = 1;
level < mgdof.get_triangulation().n_global_levels();
++level)
{
LinearAlgebra::distributed::Vector<Number, MemorySpace::Host> v1, v2;
LinearAlgebra::distributed::Vector<Number, MemorySpace::Default>
v1_cpy, v2_cpy, v3;
v1.reinit(mgdof.locally_owned_mg_dofs(level - 1), MPI_COMM_WORLD);
v2.reinit(mgdof.locally_owned_mg_dofs(level), MPI_COMM_WORLD);
v3.reinit(mgdof.locally_owned_mg_dofs(level), MPI_COMM_WORLD);
for (unsigned int i = 0; i < v1.locally_owned_size(); ++i)
v1.local_element(i) = random_value<double>();

copy_from_host(v1_cpy, v1);

transfer_host.prolongate(level, v2, v1);
transfer_device.prolongate(level, v3, v1_cpy);

copy_from_host(v2_cpy, v2);

v3 -= v2_cpy;
deallog << "Diff prolongate l" << level << ": " << v3.l2_norm()
<< std::endl;
}

// check restriction for all levels using random vector
for (unsigned int level = 1;
level < mgdof.get_triangulation().n_global_levels();
++level)
{
LinearAlgebra::distributed::Vector<Number, MemorySpace::Host> v1, v2;
LinearAlgebra::distributed::Vector<Number, MemorySpace::Default>
v1_cpy, v2_cpy, v3;
v1.reinit(mgdof.locally_owned_mg_dofs(level), MPI_COMM_WORLD);
v2.reinit(mgdof.locally_owned_mg_dofs(level - 1), MPI_COMM_WORLD);
v3.reinit(mgdof.locally_owned_mg_dofs(level - 1), MPI_COMM_WORLD);
for (unsigned int i = 0; i < v1.locally_owned_size(); ++i)
v1.local_element(i) = random_value<double>();
copy_from_host(v1_cpy, v1);
transfer_host.restrict_and_add(level, v2, v1);
transfer_device.restrict_and_add(level, v3, v1_cpy);
copy_from_host(v2_cpy, v2);
v3 -= v2_cpy;
deallog << "Diff restrict l" << level << ": " << v3.l2_norm()
<< std::endl;

v2 = 1.;
v3 = 1.;
transfer_host.restrict_and_add(level, v2, v1);
transfer_device.restrict_and_add(level, v3, v1_cpy);
copy_from_host(v2_cpy, v2);
v3 -= v2_cpy;
deallog << "Diff restrict add l" << level << ": " << v3.l2_norm()
<< std::endl;
}
deallog << std::endl;
}
}


int
main(int argc, char **argv)
{
// no threading in this test...
Utilities::MPI::MPI_InitFinalize mpi(argc, argv, 1);
mpi_initlog();

check<2, double>(1);
check<2, double>(3);
check<3, double>(1);
check<3, double>(3);
check<2, float>(2);
check<3, float>(2);
}
Loading
Loading