-
Notifications
You must be signed in to change notification settings - Fork 2
Add support for device vectors through a workaround #23
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1745,6 +1745,178 @@ MGTransferMatrixFree<dim, Number, MemorySpace>::interpolate_to_mg( | |
| } | ||
|
|
||
|
|
||
| /** | ||
| * Template specialization for device vectors. | ||
| * Currently works by transferring device vectors back to the host and | ||
| * performing the transfer operation on the host. Eventually this should be | ||
| * replaced by all operations occurring on the device. | ||
| */ | ||
|
|
||
| template <int dim, typename Number> | ||
| class MGTransferMatrixFree<dim, Number, MemorySpace::Default> | ||
| : public MGTransferBase< | ||
| LinearAlgebra::distributed::Vector<Number, MemorySpace::Default>> | ||
| { | ||
| public: | ||
| using VectorType = | ||
| LinearAlgebra::distributed::Vector<Number, MemorySpace::Default>; | ||
| using VectorTypeHost = | ||
| LinearAlgebra::distributed::Vector<Number, dealii::MemorySpace::Host>; | ||
|
|
||
| MGTransferMatrixFree() | ||
| : transfer() | ||
| {} | ||
|
|
||
| MGTransferMatrixFree(const MGConstrainedDoFs &mg_constrained_dofs) | ||
| : transfer(mg_constrained_dofs) | ||
| {} | ||
|
|
||
| MGTransferMatrixFree( | ||
| const MGLevelObject<MGTwoLevelTransfer<dim, VectorTypeHost>> &mg_transfers, | ||
| const std::function<void(const unsigned int, VectorTypeHost &)> | ||
| &initialize_dof_vector) | ||
| : transfer(mg_transfers, initialize_dof_vector) | ||
| {} | ||
|
|
||
| void | ||
| build(const std::vector<std::shared_ptr<const Utilities::MPI::Partitioner>> | ||
| &external_partitioners = {}) | ||
| { | ||
| transfer.build(external_partitioners); | ||
| } | ||
|
|
||
| void | ||
| build(const std::function<void(const unsigned int, VectorType &)> | ||
| &initialize_dof_vector) | ||
| { | ||
| transfer.build(initialize_dof_vector); | ||
| } | ||
|
|
||
| void | ||
| build(const DoFHandler<dim> &dof_handler, | ||
| const std::vector<std::shared_ptr<const Utilities::MPI::Partitioner>> | ||
| &external_partitioners = {}) | ||
| { | ||
| transfer.build(dof_handler, external_partitioners); | ||
| } | ||
|
|
||
| void | ||
| build(const DoFHandler<dim> &dof_handler, | ||
| const std::function<void(const unsigned int, VectorType &)> | ||
| &initialize_dof_vector) | ||
| { | ||
| transfer.build(dof_handler, initialize_dof_vector); | ||
| } | ||
|
|
||
|
|
||
| template <typename Number2> | ||
| void | ||
| copy_to_mg( | ||
| const DoFHandler<dim> &dof_handler, | ||
| MGLevelObject<VectorType> &dst, | ||
| const LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default> | ||
| &src) const | ||
| { | ||
| MGLevelObject<VectorTypeHost> dst_host(dst.min_level(), dst.max_level()); | ||
| LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host> | ||
| src_host; | ||
|
|
||
| copy_to_host(src_host, src); | ||
|
|
||
| transfer.copy_to_mg(dof_handler, dst_host, src_host); | ||
|
|
||
| for (unsigned int l = dst.min_level(); l <= dst.max_level(); ++l) | ||
| copy_from_host(dst[l], dst_host[l]); | ||
| } | ||
|
|
||
| template <typename Number2> | ||
| void | ||
| copy_from_mg( | ||
| const DoFHandler<dim> &dof_handler, | ||
| LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default> &dst, | ||
| const MGLevelObject<VectorType> &src) const | ||
| { | ||
| LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host> | ||
| dst_host; | ||
| MGLevelObject<VectorTypeHost> src_host(src.min_level(), src.max_level()); | ||
|
|
||
| dst_host.reinit(dst.get_partitioner()); | ||
| for (unsigned int l = src.min_level(); l <= src.max_level(); ++l) | ||
| copy_to_host(src_host[l], src[l]); | ||
|
|
||
| transfer.copy_from_mg(dof_handler, dst_host, src_host); | ||
|
|
||
| copy_from_host(dst, dst_host); | ||
| } | ||
|
|
||
| void | ||
| prolongate(const unsigned int to_level, | ||
| VectorType &dst, | ||
| const VectorType &src) const override | ||
| { | ||
| VectorTypeHost dst_host; | ||
| VectorTypeHost src_host; | ||
|
|
||
| dst_host.reinit(dst.get_partitioner()); | ||
| copy_to_host(src_host, src); | ||
|
|
||
| transfer.prolongate(to_level, dst_host, src_host); | ||
|
|
||
| copy_from_host(dst, dst_host); | ||
| } | ||
|
|
||
| void | ||
| restrict_and_add(const unsigned int from_level, | ||
| VectorType &dst, | ||
| const VectorType &src) const override | ||
| { | ||
| VectorTypeHost dst_host; | ||
| VectorTypeHost src_host; | ||
|
|
||
| copy_to_host(dst_host, dst); | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here it is necessary though |
||
| copy_to_host(src_host, src); | ||
|
|
||
| transfer.restrict_and_add(from_level, dst_host, src_host); | ||
|
|
||
| copy_from_host(dst, dst_host); | ||
| } | ||
|
|
||
| private: | ||
| MGTransferMatrixFree<dim, Number, dealii::MemorySpace::Host> transfer; | ||
|
|
||
| template <typename Number2> | ||
| void | ||
| copy_to_host( | ||
| LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host> &dst, | ||
| const LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default> | ||
| &src) const | ||
| { | ||
| LinearAlgebra::ReadWriteVector<Number2> rw_vector( | ||
| src.get_partitioner()->locally_owned_range()); | ||
| rw_vector.import_elements(src, VectorOperation::insert); | ||
|
|
||
| dst.reinit(src.get_partitioner()); | ||
| dst.import_elements(rw_vector, VectorOperation::insert); | ||
| } | ||
|
|
||
| template <typename Number2> | ||
| void | ||
| copy_from_host( | ||
| LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default> &dst, | ||
| const LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host> | ||
| &src) const | ||
| { | ||
| LinearAlgebra::ReadWriteVector<Number2> rw_vector( | ||
| src.get_partitioner()->locally_owned_range()); | ||
| rw_vector.import_elements(src, VectorOperation::insert); | ||
|
|
||
| if (dst.size() == 0) | ||
| dst.reinit(src.get_partitioner()); | ||
| dst.import_elements(rw_vector, VectorOperation::insert); | ||
| } | ||
| }; | ||
|
|
||
|
|
||
|
|
||
| template <int dim, typename Number, typename TransferType> | ||
| template <typename BlockVectorType2> | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,190 @@ | ||
| // ------------------------------------------------------------------------ | ||
| // | ||
| // SPDX-License-Identifier: LGPL-2.1-or-later | ||
| // Copyright (C) 2016 - 2023 by the deal.II authors | ||
| // | ||
| // This file is part of the deal.II library. | ||
| // | ||
| // Part of the source code is dual licensed under Apache-2.0 WITH | ||
| // LLVM-exception OR LGPL-2.1-or-later. Detailed license information | ||
| // governing the source code and code contributions can be found in | ||
| // LICENSE.md and CONTRIBUTING.md at the top level directory of deal.II. | ||
| // | ||
| // ------------------------------------------------------------------------ | ||
|
|
||
|
|
||
| // Check MGTransferMatrixFree by comparison with MGTransferPrebuilt on a | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update this comment to include device |
||
| // series of meshes with uniform meshes for FE_Q | ||
|
|
||
| #include <deal.II/distributed/tria.h> | ||
|
|
||
| #include <deal.II/fe/fe_q.h> | ||
|
|
||
| #include <deal.II/grid/grid_generator.h> | ||
|
|
||
| #include <deal.II/lac/la_parallel_vector.h> | ||
|
|
||
| #include <deal.II/multigrid/mg_base.h> | ||
| #include <deal.II/multigrid/mg_base.templates.h> | ||
| #include <deal.II/multigrid/mg_transfer.h> | ||
| #include <deal.II/multigrid/mg_transfer_matrix_free.h> | ||
|
|
||
| #include "../tests.h" | ||
|
|
||
| using namespace dealii; | ||
|
|
||
| template <typename Number2> | ||
| void | ||
| copy_to_host( | ||
| LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host> &dst, | ||
| const LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default> &src) | ||
| { | ||
| LinearAlgebra::ReadWriteVector<Number2> rw_vector( | ||
| src.get_partitioner()->locally_owned_range()); | ||
| rw_vector.import_elements(src, VectorOperation::insert); | ||
|
|
||
| dst.reinit(src.get_partitioner()); | ||
| dst.import_elements(rw_vector, VectorOperation::insert); | ||
| } | ||
|
|
||
| template <typename Number2> | ||
| void | ||
| copy_from_host( | ||
| LinearAlgebra::distributed::Vector<Number2, MemorySpace::Default> &dst, | ||
| const LinearAlgebra::distributed::Vector<Number2, dealii::MemorySpace::Host> | ||
| &src) | ||
| { | ||
| LinearAlgebra::ReadWriteVector<Number2> rw_vector( | ||
| src.get_partitioner()->locally_owned_range()); | ||
| rw_vector.import_elements(src, VectorOperation::insert); | ||
|
|
||
| if (dst.size() == 0) | ||
| dst.reinit(src.get_partitioner()); | ||
| dst.import_elements(rw_vector, VectorOperation::insert); | ||
| } | ||
|
|
||
| template <int dim, typename Number> | ||
| void | ||
| check(const unsigned int fe_degree) | ||
| { | ||
| FE_Q<dim> fe(fe_degree); | ||
| deallog << "FE: " << fe.get_name() << std::endl; | ||
|
|
||
| // run a few different sizes... | ||
| unsigned int sizes[] = {1, 2, 3, 4, 5, 6, 8}; | ||
| for (unsigned int cycle = 0; cycle < sizeof(sizes) / sizeof(unsigned int); | ||
| ++cycle) | ||
| { | ||
| unsigned int n_refinements = 0; | ||
| unsigned int n_subdiv = sizes[cycle]; | ||
| if (n_subdiv > 1) | ||
| while (n_subdiv % 2 == 0) | ||
| { | ||
| n_refinements += 1; | ||
| n_subdiv /= 2; | ||
| } | ||
| n_refinements += 3 - dim; | ||
| if (fe_degree < 3) | ||
| n_refinements += 1; | ||
| parallel::distributed::Triangulation<dim> tr( | ||
| MPI_COMM_WORLD, | ||
| Triangulation<dim>::limit_level_difference_at_vertices, | ||
| parallel::distributed::Triangulation< | ||
| dim>::construct_multigrid_hierarchy); | ||
| GridGenerator::subdivided_hyper_cube(tr, n_subdiv); | ||
| tr.refine_global(n_refinements); | ||
| deallog << "no. cells: " << tr.n_global_active_cells() << std::endl; | ||
|
|
||
| DoFHandler<dim> mgdof(tr); | ||
| mgdof.distribute_dofs(fe); | ||
| mgdof.distribute_mg_dofs(); | ||
|
|
||
| MGConstrainedDoFs mg_constrained_dofs; | ||
| mg_constrained_dofs.initialize(mgdof); | ||
| mg_constrained_dofs.make_zero_boundary_constraints(mgdof, {0}); | ||
|
|
||
| // build host reference | ||
| MGTransferMatrixFree<dim, Number, dealii::MemorySpace::Host> | ||
| transfer_host(mg_constrained_dofs); | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like you are using local smoothing multi grid here. Not wrong, but portable MatrixFree is currently only doing global coarsening. |
||
| transfer_host.build(mgdof); | ||
|
|
||
| // build device transfer | ||
| MGTransferMatrixFree<dim, Number, dealii::MemorySpace::Default> | ||
| transfer_device(mg_constrained_dofs); | ||
| transfer_device.build(mgdof); | ||
|
|
||
| // check prolongation for all levels using random vector | ||
| for (unsigned int level = 1; | ||
| level < mgdof.get_triangulation().n_global_levels(); | ||
| ++level) | ||
| { | ||
| LinearAlgebra::distributed::Vector<Number, MemorySpace::Host> v1, v2; | ||
| LinearAlgebra::distributed::Vector<Number, MemorySpace::Default> | ||
| v1_cpy, v2_cpy, v3; | ||
| v1.reinit(mgdof.locally_owned_mg_dofs(level - 1), MPI_COMM_WORLD); | ||
| v2.reinit(mgdof.locally_owned_mg_dofs(level), MPI_COMM_WORLD); | ||
| v3.reinit(mgdof.locally_owned_mg_dofs(level), MPI_COMM_WORLD); | ||
| for (unsigned int i = 0; i < v1.locally_owned_size(); ++i) | ||
| v1.local_element(i) = random_value<double>(); | ||
|
|
||
| copy_from_host(v1_cpy, v1); | ||
|
|
||
| transfer_host.prolongate(level, v2, v1); | ||
| transfer_device.prolongate(level, v3, v1_cpy); | ||
|
|
||
| copy_from_host(v2_cpy, v2); | ||
|
|
||
| v3 -= v2_cpy; | ||
| deallog << "Diff prolongate l" << level << ": " << v3.l2_norm() | ||
| << std::endl; | ||
| } | ||
|
|
||
| // check restriction for all levels using random vector | ||
| for (unsigned int level = 1; | ||
| level < mgdof.get_triangulation().n_global_levels(); | ||
| ++level) | ||
| { | ||
| LinearAlgebra::distributed::Vector<Number, MemorySpace::Host> v1, v2; | ||
| LinearAlgebra::distributed::Vector<Number, MemorySpace::Default> | ||
| v1_cpy, v2_cpy, v3; | ||
| v1.reinit(mgdof.locally_owned_mg_dofs(level), MPI_COMM_WORLD); | ||
| v2.reinit(mgdof.locally_owned_mg_dofs(level - 1), MPI_COMM_WORLD); | ||
| v3.reinit(mgdof.locally_owned_mg_dofs(level - 1), MPI_COMM_WORLD); | ||
| for (unsigned int i = 0; i < v1.locally_owned_size(); ++i) | ||
| v1.local_element(i) = random_value<double>(); | ||
| copy_from_host(v1_cpy, v1); | ||
| transfer_host.restrict_and_add(level, v2, v1); | ||
| transfer_device.restrict_and_add(level, v3, v1_cpy); | ||
| copy_from_host(v2_cpy, v2); | ||
| v3 -= v2_cpy; | ||
| deallog << "Diff restrict l" << level << ": " << v3.l2_norm() | ||
| << std::endl; | ||
|
|
||
| v2 = 1.; | ||
| v3 = 1.; | ||
| transfer_host.restrict_and_add(level, v2, v1); | ||
| transfer_device.restrict_and_add(level, v3, v1_cpy); | ||
| copy_from_host(v2_cpy, v2); | ||
| v3 -= v2_cpy; | ||
| deallog << "Diff restrict add l" << level << ": " << v3.l2_norm() | ||
| << std::endl; | ||
| } | ||
| deallog << std::endl; | ||
| } | ||
| } | ||
|
|
||
|
|
||
| int | ||
| main(int argc, char **argv) | ||
| { | ||
| // no threading in this test... | ||
| Utilities::MPI::MPI_InitFinalize mpi(argc, argv, 1); | ||
| mpi_initlog(); | ||
|
|
||
| check<2, double>(1); | ||
| check<2, double>(3); | ||
| check<3, double>(1); | ||
| check<3, double>(3); | ||
| check<2, float>(2); | ||
| check<3, float>(2); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is a bit surprising that we initialize with host vectors here, right? That would mean if we make this work on device later, user code needs to change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes this is a good point but rather annoying to work around. These inputs are used to initialize the underlying MGTransferMatrixFree class that is defined on the host which does the "workaround" part of this. To switch these to be device elements we would need some method by which we could convert them to equivalent objects on the host which I'm not sure how easy or feasible that is.