From c11dcd63c95f1c780ae1e81666b2c704fdfbb247 Mon Sep 17 00:00:00 2001 From: Tomislav Janjusic Date: Sat, 25 Oct 2025 01:05:43 +0300 Subject: [PATCH] coll: Remove hcoll component This commit completely removes the mca/coll/hcoll component from the Open MPI source code. The hcoll component provided integration with Mellanox's Hierarchical Collectives library for collective operation offload. Changes include: - Removed ompi/mca/coll/hcoll component directory and all source files - Removed config/ompi_check_libhcoll.m4 configuration macro - Updated coll_tags.h to remove HCOLL tag space definitions - Updated coll_base_allgather.c and coll_base_allgatherv.c to use NEIGHBOR tag space instead of HCOLL tag space - Removed hcoll references from platform configuration files - Removed hcoll documentation and configuration options - Removed hcoll references from code comments Signed-off-by: Tomislav Janjusic --- .ci/mellanox/README.md | 2 +- config/ompi_check_libhcoll.m4 | 61 -- contrib/amca-param-sets/ft-mpi | 4 +- contrib/platform/intel/bend/linux | 2 +- contrib/platform/lanl/toss/README.md | 1 - .../lanl/toss/toss2-mlx-optimized.conf | 1 - contrib/platform/mellanox/optimized | 5 - docs/features/ulfm.rst | 2 +- .../configure-cli-options/networking.rst | 9 - docs/tuning-apps/coll-tuned.rst | 2 +- docs/tuning-apps/networking/cuda.rst | 28 - ompi/mca/coll/base/coll_base_allgather.c | 4 +- ompi/mca/coll/base/coll_base_allgatherv.c | 4 +- ompi/mca/coll/base/coll_tags.h | 4 +- ompi/mca/coll/hcoll/Makefile.am | 50 - ompi/mca/coll/hcoll/coll_hcoll.h | 342 ------- ompi/mca/coll/hcoll/coll_hcoll_component.c | 251 ----- ompi/mca/coll/hcoll/coll_hcoll_debug.h | 35 - ompi/mca/coll/hcoll/coll_hcoll_dtypes.h | 281 ------ ompi/mca/coll/hcoll/coll_hcoll_module.c | 478 --------- ompi/mca/coll/hcoll/coll_hcoll_ops.c | 945 ------------------ ompi/mca/coll/hcoll/coll_hcoll_rte.c | 487 --------- ompi/mca/coll/hcoll/configure.m4 | 38 - ompi/mca/coll/hcoll/owner.txt | 7 - ompi/op/op.c | 4 - oshmem/mca/scoll/basic/scoll_basic.h | 2 +- 26 files changed, 12 insertions(+), 3037 deletions(-) delete mode 100644 config/ompi_check_libhcoll.m4 delete mode 100644 ompi/mca/coll/hcoll/Makefile.am delete mode 100644 ompi/mca/coll/hcoll/coll_hcoll.h delete mode 100644 ompi/mca/coll/hcoll/coll_hcoll_component.c delete mode 100644 ompi/mca/coll/hcoll/coll_hcoll_debug.h delete mode 100644 ompi/mca/coll/hcoll/coll_hcoll_dtypes.h delete mode 100644 ompi/mca/coll/hcoll/coll_hcoll_module.c delete mode 100644 ompi/mca/coll/hcoll/coll_hcoll_ops.c delete mode 100644 ompi/mca/coll/hcoll/coll_hcoll_rte.c delete mode 100644 ompi/mca/coll/hcoll/configure.m4 delete mode 100644 ompi/mca/coll/hcoll/owner.txt diff --git a/.ci/mellanox/README.md b/.ci/mellanox/README.md index 2a9d5c09b15..c41229021ff 100644 --- a/.ci/mellanox/README.md +++ b/.ci/mellanox/README.md @@ -6,7 +6,7 @@ CI is managed by [Azure Pipelines](https://docs.microsoft.com/en-us/azure/devops/pipelines/?view=azure-devops) service. Mellanox Open MPI CI includes: -* Open MPI building with internal stable engineering versions of UCX and HCOLL. The building is run in Docker-based environment. +* Open MPI building with internal stable engineering versions of UCX. The building is run in Docker-based environment. * Sanity functional testing. ### How to Run CI Mellanox Open MPI CI is triggered upon the following events: diff --git a/config/ompi_check_libhcoll.m4 b/config/ompi_check_libhcoll.m4 deleted file mode 100644 index 2151148ab14..00000000000 --- a/config/ompi_check_libhcoll.m4 +++ /dev/null @@ -1,61 +0,0 @@ -dnl -*- shell-script -*- -dnl -dnl Copyright (c) 2011 Mellanox Technologies. All rights reserved. -dnl Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2015 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. -dnl $COPYRIGHT$ -dnl -dnl Additional copyrights may follow -dnl -dnl $HEADER$ -dnl - -# OMPI_CHECK_HCOLL(prefix, [action-if-found], [action-if-not-found]) -# -------------------------------------------------------- -# check if hcoll support can be found. sets prefix_{CPPFLAGS, -# LDFLAGS, LIBS} as needed and runs action-if-found if there is -# support, otherwise executes action-if-not-found -AC_DEFUN([OMPI_CHECK_HCOLL],[ - OPAL_VAR_SCOPE_PUSH([ompi_check_hcoll_happy CPPFLAGS_save LDFLAGS_save LIBS_save]) - - AC_ARG_WITH([hcoll], - [AS_HELP_STRING([--with-hcoll(=DIR)], - [Build hcoll (Mellanox Hierarchical Collectives) support, optionally adding - DIR/include and DIR/lib or DIR/lib64 to the search path for headers and libraries])]) - - OAC_CHECK_PACKAGE([hcoll], - [$1], - [hcoll/api/hcoll_api.h], - [hcoll], - [hcoll_get_version], - [ompi_check_hcoll_happy="yes"], - [ompi_check_hcoll_happy="no"]) - - AS_IF([test "$ompi_check_hcoll_happy" = "yes"], - [CPPFLAGS_save=$CPPFLAGS - LDFLAGS_save=$LDFLAGS - LIBS_save=$LIBS - - CPPFLAGS="${$1_CPPFLAGS} ${CPPFLAGS}" - LDFLAGS="${$1_LDFLAGS} ${LDFLAGS}" - LIBS="${$1_LIBS} ${LIBS}" - - AC_CHECK_FUNCS(hcoll_context_free, [], []) - - CPPFLAGS=$CPPFLAGS_save - LDFLAGS=$LDFLAGS_save - LIBS=$LIBS_save]) - - AS_IF([test "$ompi_check_hcoll_happy" = "yes" && test "$enable_progress_threads" = "yes"], - [AC_MSG_WARN([hcoll driver does not currently support progress threads. Disabling HCOLL.]) - ompi_check_hcoll_happy="no"]) - - AS_IF([test "$ompi_check_hcoll_happy" = "yes"], - [$2], - [AS_IF([test ! -z "$with_hcoll" && test "$with_hcoll" != "no"], - [AC_MSG_ERROR([HCOLL support requested but not found. Aborting])]) - $3]) - - OPAL_VAR_SCOPE_POP -]) diff --git a/contrib/amca-param-sets/ft-mpi b/contrib/amca-param-sets/ft-mpi index 45eac4c39ee..8ff6da9c9d2 100644 --- a/contrib/amca-param-sets/ft-mpi +++ b/contrib/amca-param-sets/ft-mpi @@ -63,7 +63,7 @@ btl=^usnic # The following frameworks/components are UNTESTED, and probably won't work. # They should run without faults, and will probably crash/deadlock after a fault. # You may try at your own risk. -# coll hcoll, portals4 +# coll portals4 # topo (all) # osc (all) # io (all) @@ -72,7 +72,7 @@ btl=^usnic # We will disable only the components for which good components are known to exist. # Other untested components are selectable but will issue a runtime warning at # initiation if FT is enabled. -coll=^hcoll,portals4 +coll=^portals4 # # The following frameworks/components are NOT WORKING. Do not enable these with FT. diff --git a/contrib/platform/intel/bend/linux b/contrib/platform/intel/bend/linux index 10580121a58..85c540b8094 100644 --- a/contrib/platform/intel/bend/linux +++ b/contrib/platform/intel/bend/linux @@ -13,7 +13,7 @@ enable_ipv6=no enable_man_pages=no enable_mpi_fortran=no enable_memchecker=no -enable_mca_no_build=memchecker,coll-adapt,coll-cuda,coll-demo,coll-ftagree,coll-han,coll-hcoll,coll-inter,coll-libnbc,coll-monitoring,coll-portals4,coll-tuned,common-monitoring,common-ompio,fbtl,fcoll,fs,io,mtl,osc,pml-cm,pml-monitoring,pml-ucx,pml-v,sharedfp,topo,vprotocol,btl-ofi,btl-portals4,btl-smcuda,btl-uct,btl-ugni,btl-usnic,common-cuda,common-ofi,common-ucx +enable_mca_no_build=memchecker,coll-adapt,coll-cuda,coll-demo,coll-ftagree,coll-han,coll-inter,coll-libnbc,coll-monitoring,coll-portals4,coll-tuned,common-monitoring,common-ompio,fbtl,fcoll,fs,io,mtl,osc,pml-cm,pml-monitoring,pml-ucx,pml-v,sharedfp,topo,vprotocol,btl-ofi,btl-portals4,btl-smcuda,btl-uct,btl-ugni,btl-usnic,common-cuda,common-ofi,common-ucx enable_contrib_no_build=libompitrace with_memory_manager=no with_devel_headers=yes diff --git a/contrib/platform/lanl/toss/README.md b/contrib/platform/lanl/toss/README.md index d677de18ff6..0a83c5b29b4 100644 --- a/contrib/platform/lanl/toss/README.md +++ b/contrib/platform/lanl/toss/README.md @@ -43,7 +43,6 @@ created. (change S to X; make sure numbers match those for the same entry in contrib/platform/lanl/toss/optimized-mlx.conf) - addition: pml = ob1 (disable MXM) - - addition: coll = ^hcoll (disable MXM) - toss3-hfi-optimized - copy of toss2-qib-optimized - toss3-hfi-optimized.conf diff --git a/contrib/platform/lanl/toss/toss2-mlx-optimized.conf b/contrib/platform/lanl/toss/toss2-mlx-optimized.conf index b44452760d7..9148f8d51f5 100644 --- a/contrib/platform/lanl/toss/toss2-mlx-optimized.conf +++ b/contrib/platform/lanl/toss/toss2-mlx-optimized.conf @@ -106,4 +106,3 @@ ras_base_launch_orted_on_hn = true ## Disable MXM pml = ob1 -coll = ^hcoll diff --git a/contrib/platform/mellanox/optimized b/contrib/platform/mellanox/optimized index fdde7cfc15f..f75b8e09c0f 100644 --- a/contrib/platform/mellanox/optimized +++ b/contrib/platform/mellanox/optimized @@ -22,11 +22,6 @@ if [ "$mellanox_autodetect" == "yes" ]; then with_ucx=$ucx_dir fi - hcoll_dir=${hcoll_dir:="$(pkg-config --variable=prefix hcoll)"} - if [ -d $hcoll_dir ]; then - with_hcoll=$hcoll_dir - fi - slurm_dir=${slurm_dir:="/usr"} if [ -f $slurm_dir/include/slurm/slurm.h ]; then with_slurm=$slurm_dir diff --git a/docs/features/ulfm.rst b/docs/features/ulfm.rst index 86815b7e435..6cb0acdd006 100644 --- a/docs/features/ulfm.rst +++ b/docs/features/ulfm.rst @@ -333,7 +333,7 @@ correctly after a failure. * ``cuda``, ``inter``, ``sync``, ``sm``: **untested** (they have not been modified to handle faults, but we expect correct post-fault behavior) - * ``hcoll``, ``portals4`` **disabled** (they have not been modified + * ``portals4`` **disabled** (it has not been modified to handle faults, and we expect unspecified post-fault behavior) * ``osc``: MPI one-sided communications diff --git a/docs/installing-open-mpi/configure-cli-options/networking.rst b/docs/installing-open-mpi/configure-cli-options/networking.rst index cabb723a482..d5ff8108929 100644 --- a/docs/installing-open-mpi/configure-cli-options/networking.rst +++ b/docs/installing-open-mpi/configure-cli-options/networking.rst @@ -14,15 +14,6 @@ can be used with ``configure``: FCA is the support library for Mellanox switches and HCAs. -* ``--with-hcoll=DIR``: - Specify the directory where the Mellanox hcoll library and header - files are located. This option is generally only necessary if the - hcoll headers and libraries are not in default compiler/linker - search paths. - - hcoll is the support library for MPI collective operation offload on - Mellanox ConnectX-3 HCAs (and later). - * ``--with-knem=DIR``: Specify the directory where the knem libraries and header files are located. This option is generally only necessary if the knem headers diff --git a/docs/tuning-apps/coll-tuned.rst b/docs/tuning-apps/coll-tuned.rst index fa9c7ba7236..1d5549256d8 100644 --- a/docs/tuning-apps/coll-tuned.rst +++ b/docs/tuning-apps/coll-tuned.rst @@ -3,7 +3,7 @@ Tuning Collectives Open MPI's ``coll`` framework provides a number of components implementing collective communication, including: ``han``, ``libnbc``, ``self``, ``ucc`` ``base``, -``hcoll``, ``sync``, ``xhc``, ``accelerator``, ``basic``, ``ftagree``, ``inter``, ``portals4``, +``sync``, ``xhc``, ``accelerator``, ``basic``, ``ftagree``, ``inter``, ``portals4``, and ``tuned``. Some of these components may not be available depending on how Open MPI was compiled and what hardware is available on the system. A run-time decision based on each component's self reported priority, selects which diff --git a/docs/tuning-apps/networking/cuda.rst b/docs/tuning-apps/networking/cuda.rst index 62e85fea4fd..8a393b3f32f 100644 --- a/docs/tuning-apps/networking/cuda.rst +++ b/docs/tuning-apps/networking/cuda.rst @@ -155,7 +155,6 @@ CUDA-aware support is available in: * The OFI (``ofi``) MTL with the CM (``cm``) PML. * Both CUDA-ized shared memory (``smcuda``) and TCP (``tcp``) BTLs with the OB1 (``ob1``) PML. -* The HCOLL (``hcoll``) COLL ///////////////////////////////////////////////////////////////////////// @@ -702,30 +701,3 @@ to query rank information and utilize that to select a GPU. MPI internal CUDA resources are released during MPI_Finalize. Thus it is an application error to call cudaDeviceReset before MPI_Finalize is called. - - -///////////////////////////////////////////////////////////////////////// - -How do I enable CUDA support in HCOLL collective component ----------------------------------------------------------- - -HCOLL component supports CUDA GPU buffers for the following -collectives: - -MPI_Allreduce -MPI_Bcast -MPI_Allgather -MPI_Ibarrier -MPI_Ibcast -MPI_Iallgather -MPI_Iallreduce - -To enable CUDA GPU buffer support in these collectives pass the -following environment variables via mpirun: - -.. code-block:: - - shell$ mpirun -x HCOLL_GPU_ENABLE=1 -x HCOLL_ENABLE_NBC=1 .. - -See `nVidia HCOLL documentation `_ -for more information. diff --git a/ompi/mca/coll/base/coll_base_allgather.c b/ompi/mca/coll/base/coll_base_allgather.c index d3e27884106..6d9bd6fcfc3 100644 --- a/ompi/mca/coll/base/coll_base_allgather.c +++ b/ompi/mca/coll/base/coll_base_allgather.c @@ -291,8 +291,8 @@ int ompi_coll_base_allgather_intra_sparbit(const void *sbuf, size_t scount, /* Since each process sends several non-contiguos blocks of data, each block sent (and therefore each send and recv call) needs a different tag. */ /* As base OpenMPI only provides one tag for allgather, we are forced to use a tag space from other components in the send and recv calls */ - MCA_PML_CALL(isend(tmpsend + (ptrdiff_t) send_disp * scount * rext, scount, rdtype, sendto, MCA_COLL_BASE_TAG_HCOLL_BASE - send_disp, MCA_PML_BASE_SEND_STANDARD, comm, requests + transfer_count)); - MCA_PML_CALL(irecv(tmprecv + (ptrdiff_t) recv_disp * rcount * rext, rcount, rdtype, recvfrom, MCA_COLL_BASE_TAG_HCOLL_BASE - recv_disp, comm, requests + data_expected - exclusion + transfer_count)); + MCA_PML_CALL(isend(tmpsend + (ptrdiff_t) send_disp * scount * rext, scount, rdtype, sendto, MCA_COLL_BASE_TAG_NEIGHBOR_BASE - send_disp, MCA_PML_BASE_SEND_STANDARD, comm, requests + transfer_count)); + MCA_PML_CALL(irecv(tmprecv + (ptrdiff_t) recv_disp * rcount * rext, rcount, rdtype, recvfrom, MCA_COLL_BASE_TAG_NEIGHBOR_BASE - recv_disp, comm, requests + data_expected - exclusion + transfer_count)); } ompi_request_wait_all(transfer_count * 2, requests, MPI_STATUSES_IGNORE); diff --git a/ompi/mca/coll/base/coll_base_allgatherv.c b/ompi/mca/coll/base/coll_base_allgatherv.c index 24cd84ec616..337e09f7c77 100644 --- a/ompi/mca/coll/base/coll_base_allgatherv.c +++ b/ompi/mca/coll/base/coll_base_allgatherv.c @@ -332,12 +332,12 @@ int ompi_coll_base_allgatherv_intra_sparbit(const void *sbuf, size_t scount, if(ompi_count_array_get(rcounts, send_disp) > 0) MCA_PML_CALL(isend(tmpsend + ompi_disp_array_get(rdispls, send_disp) * rext, ompi_count_array_get(rcounts, send_disp), rdtype, sendto, - MCA_COLL_BASE_TAG_HCOLL_BASE - send_disp, + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - send_disp, MCA_PML_BASE_SEND_STANDARD, comm, requests + step_requests++)); if(ompi_count_array_get(rcounts, recv_disp) > 0) MCA_PML_CALL(irecv(tmprecv + ompi_disp_array_get(rdispls, recv_disp) * rext, ompi_count_array_get(rcounts, recv_disp), rdtype, recvfrom, - MCA_COLL_BASE_TAG_HCOLL_BASE - recv_disp, comm, + MCA_COLL_BASE_TAG_NEIGHBOR_BASE - recv_disp, comm, requests + step_requests++)); } ompi_request_wait_all(step_requests, requests, MPI_STATUSES_IGNORE); diff --git a/ompi/mca/coll/base/coll_tags.h b/ompi/mca/coll/base/coll_tags.h index 5dc19061b37..5d3da7eafe5 100644 --- a/ompi/mca/coll/base/coll_tags.h +++ b/ompi/mca/coll/base/coll_tags.h @@ -69,10 +69,8 @@ #define MCA_COLL_BASE_TAG_NONBLOCKING_END ((-1 * INT_MAX/2) + 1) #define MCA_COLL_BASE_TAG_NEIGHBOR_BASE (MCA_COLL_BASE_TAG_NONBLOCKING_END - 1) #define MCA_COLL_BASE_TAG_NEIGHBOR_END (MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 1024) -#define MCA_COLL_BASE_TAG_HCOLL_BASE (-1 * INT_MAX/2) -#define MCA_COLL_BASE_TAG_HCOLL_END (-1 * INT_MAX) #define MCA_COLL_BASE_TAG_BASE MCA_COLL_BASE_TAG_BLOCKING_BASE -#define MCA_COLL_BASE_TAG_END MCA_COLL_BASE_TAG_HCOLL_END +#define MCA_COLL_BASE_TAG_END MCA_COLL_BASE_TAG_NEIGHBOR_END #endif /* MCA_COLL_BASE_TAGS_H */ diff --git a/ompi/mca/coll/hcoll/Makefile.am b/ompi/mca/coll/hcoll/Makefile.am deleted file mode 100644 index 37ec1c96c92..00000000000 --- a/ompi/mca/coll/hcoll/Makefile.am +++ /dev/null @@ -1,50 +0,0 @@ -# -*- shell-script -*- -# -# -# Copyright (c) 2011 Mellanox Technologies. All rights reserved. -# Copyright (c) 2015 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# - -AM_CPPFLAGS = $(coll_hcoll_CPPFLAGS) - -coll_hcoll_sources = \ - coll_hcoll.h \ - coll_hcoll_debug.h \ - coll_hcoll_dtypes.h \ - coll_hcoll_module.c \ - coll_hcoll_component.c \ - coll_hcoll_rte.c \ - coll_hcoll_ops.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_ompi_coll_hcoll_DSO -component_noinst = -component_install = mca_coll_hcoll.la -else -component_noinst = libmca_coll_hcoll.la -component_install = -endif - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_coll_hcoll_la_SOURCES = $(coll_hcoll_sources) -mca_coll_hcoll_la_LIBADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ - $(coll_hcoll_LIBS) -mca_coll_hcoll_la_LDFLAGS = -module -avoid-version $(coll_hcoll_LDFLAGS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_coll_hcoll_la_SOURCES = $(coll_hcoll_sources) -libmca_coll_hcoll_la_LIBADD = $(coll_hcoll_LIBS) -libmca_coll_hcoll_la_LDFLAGS = -module -avoid-version $(coll_hcoll_LDFLAGS) - diff --git a/ompi/mca/coll/hcoll/coll_hcoll.h b/ompi/mca/coll/hcoll/coll_hcoll.h deleted file mode 100644 index 07460aeb3f8..00000000000 --- a/ompi/mca/coll/hcoll/coll_hcoll.h +++ /dev/null @@ -1,342 +0,0 @@ -/** - Copyright (c) 2011 Mellanox Technologies. All rights reserved. - Copyright (c) 2015-2019 Research Organization for Information Science - and Technology (RIST). All rights reserved. - $COPYRIGHT$ - - Additional copyrights may follow - - $HEADER$ - */ - -#ifndef MCA_COLL_FCA_H -#define MCA_COLL_FCA_H - -#include "ompi_config.h" - -#include "mpi.h" -#include "ompi/runtime/ompi_rte.h" -#include "ompi/mca/mca.h" -#include "opal/memoryhooks/memory.h" -#include "opal/mca/memory/base/base.h" -#include "ompi/mca/coll/coll.h" -#include "ompi/request/request.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/coll/base/coll_tags.h" -#include "ompi/communicator/communicator.h" -#include "ompi/attribute/attribute.h" -#include "ompi/op/op.h" - -#include "hcoll/api/hcoll_api.h" -#include "hcoll/api/hcoll_constants.h" - - -#include "coll_hcoll_debug.h" -#ifndef HCOLL_VERSION -#define HCOLL_VERSION(major, minor) (((major)<= HCOLL_VERSION(3,2) - /* hcoll init options */ - hcoll_init_opts_t *init_opts; -#endif - - /* FCA global stuff */ - mca_coll_hcoll_ops_t hcoll_ops; - opal_free_list_t requests; - opal_free_list_t dtypes; - int derived_types_support_enabled; -}; -typedef struct mca_coll_hcoll_component_t mca_coll_hcoll_component_t; - -OMPI_DECLSPEC extern mca_coll_hcoll_component_t mca_coll_hcoll_component; - - - - -/** - * FCA enabled communicator - */ -struct mca_coll_hcoll_module_t { - mca_coll_base_module_t super; - - ompi_communicator_t *comm; - int rank; - void *hcoll_context; - /* Saved handlers - for fallback */ - mca_coll_base_module_reduce_fn_t previous_reduce; - mca_coll_base_module_t *previous_reduce_module; - mca_coll_base_module_allreduce_fn_t previous_allreduce; - mca_coll_base_module_t *previous_allreduce_module; - mca_coll_base_module_bcast_fn_t previous_bcast; - mca_coll_base_module_t *previous_bcast_module; - mca_coll_base_module_barrier_fn_t previous_barrier; - mca_coll_base_module_t *previous_barrier_module; - mca_coll_base_module_allgather_fn_t previous_allgather; - mca_coll_base_module_t *previous_allgather_module; - mca_coll_base_module_allgatherv_fn_t previous_allgatherv; - mca_coll_base_module_t *previous_allgatherv_module; - mca_coll_base_module_alltoall_fn_t previous_alltoall; - mca_coll_base_module_t *previous_alltoall_module; - mca_coll_base_module_alltoallv_fn_t previous_alltoallv; - mca_coll_base_module_t *previous_alltoallv_module; - mca_coll_base_module_alltoallw_fn_t previous_alltoallw; - mca_coll_base_module_t *previous_alltoallw_module; - mca_coll_base_module_gather_fn_t previous_gather; - mca_coll_base_module_t *previous_gather_module; - mca_coll_base_module_gatherv_fn_t previous_gatherv; - mca_coll_base_module_t *previous_gatherv_module; - mca_coll_base_module_scatterv_fn_t previous_scatterv; - mca_coll_base_module_t *previous_scatterv_module; - mca_coll_base_module_reduce_scatter_fn_t previous_reduce_scatter; - mca_coll_base_module_t *previous_reduce_scatter_module; - mca_coll_base_module_reduce_scatter_block_fn_t previous_reduce_scatter_block; - mca_coll_base_module_t *previous_reduce_scatter_block_module; - mca_coll_base_module_ibcast_fn_t previous_ibcast; - mca_coll_base_module_t *previous_ibcast_module; - mca_coll_base_module_ibarrier_fn_t previous_ibarrier; - mca_coll_base_module_t *previous_ibarrier_module; - mca_coll_base_module_iallgather_fn_t previous_iallgather; - mca_coll_base_module_t *previous_iallgather_module; - mca_coll_base_module_iallgatherv_fn_t previous_iallgatherv; - mca_coll_base_module_t *previous_iallgatherv_module; - mca_coll_base_module_iallreduce_fn_t previous_iallreduce; - mca_coll_base_module_t *previous_iallreduce_module; - mca_coll_base_module_ireduce_fn_t previous_ireduce; - mca_coll_base_module_t *previous_ireduce_module; - mca_coll_base_module_igatherv_fn_t previous_igatherv; - mca_coll_base_module_t *previous_igatherv_module; - mca_coll_base_module_ialltoall_fn_t previous_ialltoall; - mca_coll_base_module_t *previous_ialltoall_module; - mca_coll_base_module_ialltoallv_fn_t previous_ialltoallv; - mca_coll_base_module_t *previous_ialltoallv_module; -}; -typedef struct mca_coll_hcoll_module_t mca_coll_hcoll_module_t; - -OBJ_CLASS_DECLARATION(mca_coll_hcoll_module_t); - - - - -/* API functions */ -int mca_coll_hcoll_init_query(bool enable_progress_threads, bool enable_mpi_threads); -mca_coll_base_module_t *mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority); -int mca_coll_hcoll_get_lib(void); -void hcoll_rte_fns_setup(void); - - -int mca_coll_hcoll_barrier(struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -int mca_coll_hcoll_bcast(void *buff, size_t count, - struct ompi_datatype_t *datatype, int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -int mca_coll_hcoll_allgather(const void *sbuf, size_t scount, - struct ompi_datatype_t *sdtype, - void *rbuf, size_t rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -int mca_coll_hcoll_allgatherv(const void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, ompi_count_array_t rcount, - ompi_disp_array_t displs, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -int mca_coll_hcoll_gather(const void *sbuf, size_t scount, - struct ompi_datatype_t *sdtype, - void *rbuf, size_t rcount, - struct ompi_datatype_t *rdtype, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -int mca_coll_hcoll_allreduce(const void *sbuf, void *rbuf, size_t count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -#if HCOLL_API > HCOLL_VERSION(4,5) -int mca_coll_hcoll_reduce_scatter_block(const void *sbuf, void *rbuf, size_t rcount, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); -int mca_coll_hcoll_reduce_scatter(const void *sbuf, void *rbuf, ompi_count_array_t rcounts, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); -#endif -int mca_coll_hcoll_reduce(const void *sbuf, void *rbuf, size_t count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -int mca_coll_hcoll_alltoall(const void *sbuf, size_t scount, - struct ompi_datatype_t *sdtype, - void* rbuf, size_t rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -int mca_coll_hcoll_alltoallv(const void *sbuf, ompi_count_array_t scounts, - ompi_disp_array_t sdisps, - struct ompi_datatype_t *sdtype, - void *rbuf, ompi_count_array_t rcounts, - ompi_disp_array_t rdisps, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -int mca_coll_hcoll_gatherv(const void* sbuf, size_t scount, - struct ompi_datatype_t *sdtype, - void* rbuf, ompi_count_array_t rcounts, ompi_disp_array_t displs, - struct ompi_datatype_t *rdtype, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - - -int mca_coll_hcoll_scatterv(const void* sbuf, ompi_count_array_t scounts, ompi_disp_array_t displs, - struct ompi_datatype_t *sdtype, - void* rbuf, size_t rcount, - struct ompi_datatype_t *rdtype, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -int mca_coll_hcoll_ibarrier(struct ompi_communicator_t *comm, - ompi_request_t** request, - mca_coll_base_module_t *module); - -int mca_coll_hcoll_ibcast(void *buff, size_t count, - struct ompi_datatype_t *datatype, int root, - struct ompi_communicator_t *comm, - ompi_request_t** request, - mca_coll_base_module_t *module); - -int mca_coll_hcoll_iallgather(const void *sbuf, size_t scount, - struct ompi_datatype_t *sdtype, - void *rbuf, size_t rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - ompi_request_t** request, - mca_coll_base_module_t *module); - -int mca_coll_hcoll_iallgatherv(const void *sbuf, size_t scount, - struct ompi_datatype_t *sdtype, - void *rbuf, ompi_count_array_t rcount, - ompi_disp_array_t displs, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - ompi_request_t** request, - mca_coll_base_module_t *module); - -int mca_coll_hcoll_iallreduce(const void *sbuf, void *rbuf, size_t count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm, - ompi_request_t** request, - mca_coll_base_module_t *module); - -int mca_coll_hcoll_ireduce(const void *sbuf, void *rbuf, size_t count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - int root, - struct ompi_communicator_t *comm, - ompi_request_t** request, - mca_coll_base_module_t *module); - -int mca_coll_hcoll_ialltoall(const void *sbuf, size_t scount, - struct ompi_datatype_t *sdtype, - void* rbuf, size_t rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module); - -#if HCOLL_API >= HCOLL_VERSION(3,7) -int mca_coll_hcoll_ialltoallv(const void *sbuf, ompi_count_array_t scounts, - ompi_disp_array_t sdisps, - struct ompi_datatype_t *sdtype, - void *rbuf, ompi_count_array_t rcounts, - ompi_disp_array_t rdisps, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module); -#endif - -int mca_coll_hcoll_igatherv(const void* sbuf, size_t scount, - struct ompi_datatype_t *sdtype, - void* rbuf, ompi_count_array_t rcounts, ompi_disp_array_t displs, - struct ompi_datatype_t *rdtype, - int root, - struct ompi_communicator_t *comm, - ompi_request_t ** request, - mca_coll_base_module_t *module); - -void mca_coll_hcoll_mem_release_cb(void *buf, size_t length, void *cbdata, bool from_alloc); -END_C_DECLS - -#endif diff --git a/ompi/mca/coll/hcoll/coll_hcoll_component.c b/ompi/mca/coll/hcoll/coll_hcoll_component.c deleted file mode 100644 index 2c276cf79a4..00000000000 --- a/ompi/mca/coll/hcoll/coll_hcoll_component.c +++ /dev/null @@ -1,251 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2011 Mellanox Technologies. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#include - -#include -#include - -#include "coll_hcoll.h" -#include "opal/mca/installdirs/installdirs.h" -#include "coll_hcoll_dtypes.h" - -/* - * Public string showing the coll ompi_hcol component version number - */ -const char *mca_coll_hcoll_component_version_string = - "Open MPI HCOL collective MCA component version " OMPI_VERSION; - - -static int hcoll_open(void); -static int hcoll_close(void); -static int hcoll_register(void); -int mca_coll_hcoll_output = -1; -mca_coll_hcoll_component_t mca_coll_hcoll_component = { - - /* First, the mca_component_t struct containing meta information - about the component itfca */ - { - .collm_version = { - MCA_COLL_BASE_VERSION_3_0_0, - - /* Component name and version */ - .mca_component_name = "hcoll", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = hcoll_open, - .mca_close_component = hcoll_close, - .mca_register_component_params = hcoll_register, - }, - .collm_data = { - /* The component is not checkpoint ready */ - MCA_BASE_METADATA_PARAM_NONE - }, - - /* Initialization / querying functions */ - - .collm_init_query = mca_coll_hcoll_init_query, - .collm_comm_query = mca_coll_hcoll_comm_query, - }, - 90, /* priority */ - 0, /* verbose level */ - 0, /* hcoll_enable */ - NULL /*hcoll version */ -}; -MCA_BASE_COMPONENT_INIT(ompi, coll, hcoll) - - - - -int mca_coll_hcoll_get_lib(void) -{ - - memset(&mca_coll_hcoll_component.hcoll_ops, - 0, sizeof(mca_coll_hcoll_component.hcoll_ops)); - - return OMPI_SUCCESS; -} - -/* - * * Local flags - * */ -enum { - REGINT_NEG_ONE_OK = 0x01, - REGINT_GE_ZERO = 0x02, - REGINT_GE_ONE = 0x04, - REGINT_NONZERO = 0x08, - REGINT_MAX = 0x88 -}; - -enum { - REGSTR_EMPTY_OK = 0x01, - REGSTR_MAX = 0x88 -}; - - -/* - * Utility routine for integer parameter registration - */ -static int reg_int(const char* param_name, - const char* deprecated_param_name, - const char* param_desc, - int default_value, int *storage, int flags) -{ - int index; - - *storage = default_value; - index = mca_base_component_var_register( - &mca_coll_hcoll_component.super.collm_version, - param_name, param_desc, MCA_BASE_VAR_TYPE_INT, - NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_ALL, storage); - if (NULL != deprecated_param_name) { - (void) mca_base_var_register_synonym(index, - "ompi", "coll", "hcoll", deprecated_param_name, - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - } - - if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) { - return OMPI_SUCCESS; - } - - if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) || - (0 != (flags & REGINT_GE_ONE) && *storage < 1) || - (0 != (flags & REGINT_NONZERO) && 0 == *storage)) { - opal_output(0, "Bad parameter value for parameter \"%s\"", - param_name); - return OMPI_ERR_BAD_PARAM; - } - - return OMPI_SUCCESS; -} - - -static int hcoll_register(void) -{ - - int ret, tmp; - - ret = OMPI_SUCCESS; - -#define CHECK(expr) do { \ - tmp = (expr); \ - if (OMPI_SUCCESS != tmp) ret = tmp; \ - } while (0) - - - CHECK(reg_int("priority",NULL, - "Priority of the hcol coll component", - 90, - &mca_coll_hcoll_component.hcoll_priority, - 0)); - - CHECK(reg_int("verbose", NULL, - "Verbose level of the hcol coll component", - 0, - &mca_coll_hcoll_component.hcoll_verbose, - 0)); - - CHECK(reg_int("enable",NULL, - "[1|0|] Enable/Disable HCOL", - 1, - &mca_coll_hcoll_component.hcoll_enable, - 0)); - - CHECK(reg_int("np",NULL, - "Minimal number of processes in the communicator" - " for the corresponding hcoll context to be created (default: 32)", - 2, - &mca_coll_hcoll_component.hcoll_np, - 0)); - - CHECK(reg_int("datatype_fallback",NULL, - "[1|0|] Enable/Disable user defined datatypes fallback", - 1, - &mca_coll_hcoll_component.hcoll_datatype_fallback, - 0)); -#if HCOLL_API >= HCOLL_VERSION(3,6) - CHECK(reg_int("dts",NULL, - "[1|0|] Enable/Disable derived types support", - 1, - &mca_coll_hcoll_component.derived_types_support_enabled, - 0)); -#else - mca_coll_hcoll_component.derived_types_support_enabled = 0; -#endif - mca_coll_hcoll_component.compiletime_version = HCOLL_VERNO_STRING; - mca_base_component_var_register(&mca_coll_hcoll_component.super.collm_version, - MCA_COMPILETIME_VER, - "Version of the libhcoll library with which Open MPI was compiled", - MCA_BASE_VAR_TYPE_VERSION_STRING, - NULL, 0, 0, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_coll_hcoll_component.compiletime_version); - mca_coll_hcoll_component.runtime_version = hcoll_get_version(); - mca_base_component_var_register(&mca_coll_hcoll_component.super.collm_version, - MCA_RUNTIME_VER, - "Version of the libhcoll library with which Open MPI is running", - MCA_BASE_VAR_TYPE_VERSION_STRING, - NULL, 0, 0, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_coll_hcoll_component.runtime_version); - - return ret; -} - -static int hcoll_open(void) -{ - mca_coll_hcoll_component_t *cm; - cm = &mca_coll_hcoll_component; - mca_coll_hcoll_output = opal_output_open(NULL); - opal_output_set_verbosity(mca_coll_hcoll_output, cm->hcoll_verbose); - hcoll_rte_fns_setup(); - cm->libhcoll_initialized = false; - return OMPI_SUCCESS; -} - -static int hcoll_close(void) -{ - int rc; - mca_coll_hcoll_component_t *cm; - cm = &mca_coll_hcoll_component; - - if (false == cm->libhcoll_initialized) { - return OMPI_SUCCESS; - } - - if (cm->using_mem_hooks) { - opal_mem_hooks_unregister_release(mca_coll_hcoll_mem_release_cb); - } - -#if HCOLL_API >= HCOLL_VERSION(3,2) - hcoll_free_init_opts(cm->init_opts); -#endif - - HCOL_VERBOSE(5,"HCOLL FINALIZE"); - rc = hcoll_finalize(); - OBJ_DESTRUCT(&cm->dtypes); - opal_progress_unregister(hcoll_progress_fn); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(1,"Hcol library finalize failed"); - return OMPI_ERROR; - } - - mca_base_framework_close(&opal_memory_base_framework); - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/coll/hcoll/coll_hcoll_debug.h b/ompi/mca/coll/hcoll/coll_hcoll_debug.h deleted file mode 100644 index 3897399595b..00000000000 --- a/ompi/mca/coll/hcoll/coll_hcoll_debug.h +++ /dev/null @@ -1,35 +0,0 @@ -/** - Copyright (c) 2011 Mellanox Technologies. All rights reserved. - $COPYRIGHT$ - - Additional copyrights may follow - - $HEADER$ - */ - -#ifndef COLL_HCOL_DEBUG_H -#define COLL_HCOL_DEBUG_H -#include "ompi_config.h" -#pragma GCC system_header - -#ifdef __BASE_FILE__ -#define __HCOL_FILE__ __BASE_FILE__ -#else -#define __HCOL_FILE__ __FILE__ -#endif - -#define HCOL_VERBOSE(level, format, ...) \ - opal_output_verbose(level, mca_coll_hcoll_output, "%s:%d - %s() " format, \ - __HCOL_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__) - -#define HCOL_ERROR(format, ... ) \ - opal_output_verbose(0, mca_coll_hcoll_output, "Error: %s:%d - %s() " format, \ - __HCOL_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__) - - -#define HCOL_MODULE_VERBOSE(hcoll_module, level, format, ...) \ - HCOL_VERBOSE(level, "[%p:%d] " format, (void*)(hcoll_module)->comm, (hcoll_module)->rank, ## __VA_ARGS__) - -extern int mca_coll_hcoll_output; - -#endif // COLL_HCOL_DEBUG_H diff --git a/ompi/mca/coll/hcoll/coll_hcoll_dtypes.h b/ompi/mca/coll/hcoll/coll_hcoll_dtypes.h deleted file mode 100644 index 481ff855e58..00000000000 --- a/ompi/mca/coll/hcoll/coll_hcoll_dtypes.h +++ /dev/null @@ -1,281 +0,0 @@ -#ifndef COLL_HCOLL_DTYPES_H -#define COLL_HCOLL_DTYPES_H - -/*Here comes the translation between ompi_datatype_t and dte_data_representation - This is not complete and takes into account the basic datatypes only - It is used to extract allreduce bcol functions where the arrhythmetics has to be done*/ - -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/datatype/ompi_datatype_internal.h" -#include "ompi/mca/op/op.h" -#include "hcoll/api/hcoll_dte.h" -extern int hcoll_type_attr_keyval; -extern mca_coll_hcoll_dtype_t zero_dte_mapping; -/*to keep this at hand: Ids of the basic opal_datatypes: -#define OPAL_DATATYPE_INT1 4 -#define OPAL_DATATYPE_INT2 5 -#define OPAL_DATATYPE_INT4 6 -#define OPAL_DATATYPE_INT8 7 -#define OPAL_DATATYPE_INT16 8 -#define OPAL_DATATYPE_UINT1 9 -#define OPAL_DATATYPE_UINT2 10 -#define OPAL_DATATYPE_UINT4 11 -#define OPAL_DATATYPE_UINT8 12 -#define OPAL_DATATYPE_UINT16 13 -#define OPAL_DATATYPE_FLOAT2 14 -#define OPAL_DATATYPE_FLOAT4 15 -#define OPAL_DATATYPE_FLOAT8 16 -#define OPAL_DATATYPE_FLOAT12 17 -#define OPAL_DATATYPE_FLOAT16 18 -#define OPAL_DATATYPE_SHORT_FLOAT_COMPLEX 19 -#define OPAL_DATATYPE_FLOAT_COMPLEX 20 -#define OPAL_DATATYPE_DOUBLE_COMPLEX 21 - -total 15 types -*/ - -static dte_data_representation_t* ompi_datatype_2_dte_data_rep[OPAL_DATATYPE_MAX_PREDEFINED] = { - &DTE_ZERO, /*OPAL_DATATYPE_LOOP 0 */ - &DTE_ZERO, /*OPAL_DATATYPE_END_LOOP 1 */ -#if defined(DTE_LB) - &DTE_LB, /*OPAL_DATATYPE_LB 2 */ -#else - &DTE_ZERO, -#endif -#if defined(DTE_UB) - &DTE_UB, /*OPAL_DATATYPE_UB 3 */ -#else - &DTE_ZERO, -#endif - &DTE_BYTE, /*OPAL_DATATYPE_INT1 4 */ - &DTE_INT16, /*OPAL_DATATYPE_INT2 5 */ - &DTE_INT32, /*OPAL_DATATYPE_INT4 6 */ - &DTE_INT64, /*OPAL_DATATYPE_INT8 7 */ - &DTE_INT128, /*OPAL_DATATYPE_INT16 8 */ - &DTE_UBYTE, /*OPAL_DATATYPE_UINT1 9 */ - &DTE_UINT16, /*OPAL_DATATYPE_UINT2 10 */ - &DTE_UINT32, /*OPAL_DATATYPE_UINT4 11 */ - &DTE_UINT64, /*OPAL_DATATYPE_UINT8 12 */ - &DTE_UINT128, /*OPAL_DATATYPE_UINT16 13 */ -#if defined(DTE_FLOAT16) - &DTE_FLOAT16, /*OPAL_DATATYPE_FLOAT2 14 */ -#else - &DTE_ZERO, -#endif - &DTE_FLOAT32, /*OPAL_DATATYPE_FLOAT4 15 */ - &DTE_FLOAT64, /*OPAL_DATATYPE_FLOAT8 16 */ - &DTE_FLOAT96, /*OPAL_DATATYPE_FLOAT12 17 */ - &DTE_FLOAT128, /*OPAL_DATATYPE_FLOAT16 18 */ - &DTE_ZERO, /*OPAL_DATATYPE_SHORT_FLOAT_COMPLEX 19 */ -#if defined(DTE_FLOAT32_COMPLEX) - &DTE_FLOAT32_COMPLEX, /*OPAL_DATATYPE_FLOAT_COMPLEX 20 */ -#else - &DTE_ZERO, -#endif -#if defined(DTE_FLOAT64_COMPLEX) - &DTE_FLOAT64_COMPLEX, /*OPAL_DATATYPE_DOUBLE_COMPLEX 21 */ -#else - &DTE_ZERO, -#endif -#if defined(DTE_FLOAT128_COMPLEX) - &DTE_FLOAT128_COMPLEX, /*OPAL_DATATYPE_LONG_DOUBLE_COMPLEX 22 */ -#else - &DTE_ZERO, -#endif -#if defined(DTE_BOOL) - &DTE_BOOL, /*OPAL_DATATYPE_BOOL 23 */ -#else - &DTE_ZERO, -#endif -#if defined(DTE_WCHAR) - &DTE_WCHAR, /*OPAL_DATATYPE_WCHAR 24 */ -#else - &DTE_ZERO, -#endif -#if SIZEOF_LONG == 4 - &DTE_INT32, -#else - &DTE_INT64, /*OPAL_DATATYPE_LONG 25 */ -#endif -#if SIZEOF_UNSIGNED_LONG == 4 - &DTE_UINT32, -#else - &DTE_UINT64, /*OPAL_DATATYPE_UNSIGNED_LONG 26 */ -#endif - &DTE_ZERO /*OPAL_DATATYPE_UNAVAILABLE 27 */ -}; - -enum { - TRY_FIND_DERIVED, - NO_DERIVED -}; - - -#if HCOLL_API >= HCOLL_VERSION(3,6) -static inline -void hcoll_map_derived_type(ompi_datatype_t *dtype, dte_data_representation_t *new_dte) -{ - int rc; - if (NULL == dtype->args) { - /* predefined type, shouldn't call this */ - return; - } - rc = hcoll_create_mpi_type((void*)dtype, new_dte); - if (rc != HCOLL_SUCCESS) { - /* If hcoll fails to create mpi derived type let's set zero_dte on this dtype. - This will save cycles on subsequent collective calls with the same derived - type since we will not try to create hcoll type again. */ - ompi_attr_set_c(TYPE_ATTR, (void*)dtype, &(dtype->d_keyhash), - hcoll_type_attr_keyval, &zero_dte_mapping, false); - } -} - -static dte_data_representation_t find_derived_mapping(ompi_datatype_t *dtype){ - dte_data_representation_t dte = DTE_ZERO; - mca_coll_hcoll_dtype_t *hcoll_dtype; - if (mca_coll_hcoll_component.derived_types_support_enabled) { - int map_found = 0; - ompi_attr_get_c(dtype->d_keyhash, hcoll_type_attr_keyval, - (void**)&hcoll_dtype, &map_found); - if (!map_found) - hcoll_map_derived_type(dtype, &dte); - else - dte = hcoll_dtype->type; - } - - return dte; -} - - - -static inline dte_data_representation_t -ompi_predefined_derived_2_hcoll(int ompi_id) { - switch(ompi_id) { - case OMPI_DATATYPE_MPI_FLOAT_INT: - return DTE_FLOAT_INT; - case OMPI_DATATYPE_MPI_DOUBLE_INT: - return DTE_DOUBLE_INT; - case OMPI_DATATYPE_MPI_LONG_INT: - return DTE_LONG_INT; - case OMPI_DATATYPE_MPI_SHORT_INT: - return DTE_SHORT_INT; - case OMPI_DATATYPE_MPI_LONG_DOUBLE_INT: - return DTE_LONG_DOUBLE_INT; - case OMPI_DATATYPE_MPI_2INT: - return DTE_2INT; -#if HCOLL_API >= HCOLL_VERSION(3,7) - case OMPI_DATATYPE_MPI_2INTEGER: -#if OMPI_SIZEOF_FORTRAN_INTEGER == 4 - return DTE_2INT; -#elif OMPI_SIZEOF_FORTRAN_INTEGER == 8 - return DTE_2INT64; -#else - return DTE_ZERO; -#endif - case OMPI_DATATYPE_MPI_2REAL: -#if OMPI_SIZEOF_FORTRAN_REAL == 4 - return DTE_2FLOAT32; -#elif OMPI_SIZEOF_FORTRAN_REAL == 8 - return DTE_2FLOAT64; -#else - return DTE_ZERO; -#endif - case OMPI_DATATYPE_MPI_2DBLPREC: -#if OMPI_SIZEOF_FORTRAN_DOUBLE_PRECISION == 4 - return DTE_2FLOAT32; -#elif OMPI_SIZEOF_FORTRAN_DOUBLE_PRECISION == 8 - return DTE_2FLOAT64; -#else - return DTE_ZERO; -#endif -#endif - default: - break; - } - return DTE_ZERO; -} -#endif - -static dte_data_representation_t -ompi_dtype_2_hcoll_dtype( ompi_datatype_t *dtype, - const int mode) -{ - int ompi_type_id = dtype->id; - int opal_type_id = dtype->super.id; - dte_data_representation_t dte_data_rep = DTE_ZERO; - - if (ompi_type_id < OMPI_DATATYPE_MPI_MAX_PREDEFINED && - dtype->super.flags & OMPI_DATATYPE_FLAG_PREDEFINED) { - if (opal_type_id > 0 && opal_type_id < OPAL_DATATYPE_MAX_PREDEFINED) { - dte_data_rep = *ompi_datatype_2_dte_data_rep[opal_type_id]; - } -#if HCOLL_API >= HCOLL_VERSION(3,6) - else if (TRY_FIND_DERIVED == mode){ - dte_data_rep = ompi_predefined_derived_2_hcoll(ompi_type_id); - } - } else { - if (TRY_FIND_DERIVED == mode) - dte_data_rep = find_derived_mapping(dtype); -#endif - } - if (HCOL_DTE_IS_ZERO(dte_data_rep) && TRY_FIND_DERIVED == mode && - !mca_coll_hcoll_component.hcoll_datatype_fallback) { - dte_data_rep = DTE_ZERO; - dte_data_rep.rep.in_line_rep.data_handle.in_line.in_line = 0; - dte_data_rep.rep.in_line_rep.data_handle.pointer_to_handle = (uint64_t ) &dtype->super; - } - return dte_data_rep; -} - -static hcoll_dte_op_t* ompi_op_2_hcoll_op[OMPI_OP_BASE_FORTRAN_OP_MAX + 1] = { - &hcoll_dte_op_null, /* OMPI_OP_BASE_FORTRAN_NULL = 0 */ - &hcoll_dte_op_max, /* OMPI_OP_BASE_FORTRAN_MAX */ - &hcoll_dte_op_min, /* OMPI_OP_BASE_FORTRAN_MIN */ - &hcoll_dte_op_sum, /* OMPI_OP_BASE_FORTRAN_SUM */ - &hcoll_dte_op_prod, /* OMPI_OP_BASE_FORTRAN_PROD */ - &hcoll_dte_op_land, /* OMPI_OP_BASE_FORTRAN_LAND */ - &hcoll_dte_op_band, /* OMPI_OP_BASE_FORTRAN_BAND */ - &hcoll_dte_op_lor, /* OMPI_OP_BASE_FORTRAN_LOR */ - &hcoll_dte_op_bor, /* OMPI_OP_BASE_FORTRAN_BOR */ - &hcoll_dte_op_lxor, /* OMPI_OP_BASE_FORTRAN_LXOR */ - &hcoll_dte_op_bxor, /* OMPI_OP_BASE_FORTRAN_BXOR */ - &hcoll_dte_op_null, /* OMPI_OP_BASE_FORTRAN_MAXLOC */ - &hcoll_dte_op_null, /* OMPI_OP_BASE_FORTRAN_MINLOC */ - &hcoll_dte_op_null, /* OMPI_OP_BASE_FORTRAN_REPLACE */ - &hcoll_dte_op_null, /* OMPI_OP_BASE_FORTRAN_NO_OP */ - &hcoll_dte_op_null /* OMPI_OP_BASE_FORTRAN_OP_MAX */ -}; -static hcoll_dte_op_t* ompi_op_2_hcolrte_op(ompi_op_t *op) { - if (op->o_f_to_c_index > OMPI_OP_BASE_FORTRAN_OP_MAX) { - return ompi_op_2_hcoll_op[0]; /* return null */ - } - return ompi_op_2_hcoll_op[op->o_f_to_c_index]; -} - - -#if HCOLL_API >= HCOLL_VERSION(3,6) -static int hcoll_type_attr_del_fn(MPI_Datatype type, int keyval, void *attr_val, void *extra) { - int ret = OMPI_SUCCESS; - mca_coll_hcoll_dtype_t *dtype = - (mca_coll_hcoll_dtype_t*) attr_val; - - assert(dtype); - if (&zero_dte_mapping == dtype) { - return OMPI_SUCCESS; - } - if (HCOLL_SUCCESS != (ret = hcoll_dt_destroy(dtype->type))) { - HCOL_ERROR("failed to delete type attr: hcoll_dte_destroy returned %d",ret); - return OMPI_ERROR; - } - opal_free_list_return(&mca_coll_hcoll_component.dtypes, - &dtype->super); - - return OMPI_SUCCESS; -} -#else -static int hcoll_type_attr_del_fn(MPI_Datatype type, int keyval, void *attr_val, void *extra) { - /*Do nothing - it's an old version of hcoll w/o dtypes support */ - return OMPI_SUCCESS; -} -#endif -#endif /* COLL_HCOLL_DTYPES_H */ diff --git a/ompi/mca/coll/hcoll/coll_hcoll_module.c b/ompi/mca/coll/hcoll/coll_hcoll_module.c deleted file mode 100644 index 5ca588a8154..00000000000 --- a/ompi/mca/coll/hcoll/coll_hcoll_module.c +++ /dev/null @@ -1,478 +0,0 @@ -/** - * Copyright (c) 2011 Mellanox Technologies. All rights reserved. - * Copyright (c) 2016-2022 IBM Corporation. All rights reserved. - * Copyright (c) 2017 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2018 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. - * All Rights reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "coll_hcoll.h" -#include "coll_hcoll_dtypes.h" - -int hcoll_comm_attr_keyval; -int hcoll_type_attr_keyval; -mca_coll_hcoll_dtype_t zero_dte_mapping; -/* - * Initial query function that is invoked during MPI_INIT, allowing - * this module to indicate what level of thread support it provides. - */ -int mca_coll_hcoll_init_query(bool enable_progress_threads, bool enable_mpi_threads) -{ -#if HCOLL_API < HCOLL_VERSION(3,2) - if (enable_mpi_threads) { - HCOL_VERBOSE(1, "MPI_THREAD_MULTIPLE not supported; skipping hcoll component"); - return OMPI_ERROR; - } -#endif - return OMPI_SUCCESS; -} - -static void mca_coll_hcoll_module_clear(mca_coll_hcoll_module_t *hcoll_module) -{ - hcoll_module->hcoll_context = NULL; - hcoll_module->previous_barrier = NULL; - hcoll_module->previous_bcast = NULL; - hcoll_module->previous_reduce = NULL; - hcoll_module->previous_allreduce = NULL; - hcoll_module->previous_allgather = NULL; - hcoll_module->previous_allgatherv = NULL; - hcoll_module->previous_gather = NULL; - hcoll_module->previous_gatherv = NULL; - hcoll_module->previous_scatterv = NULL; - hcoll_module->previous_alltoall = NULL; - hcoll_module->previous_alltoallv = NULL; - hcoll_module->previous_alltoallw = NULL; - hcoll_module->previous_reduce = NULL; - hcoll_module->previous_reduce_scatter = NULL; - hcoll_module->previous_reduce_scatter_block = NULL; - hcoll_module->previous_ibarrier = NULL; - hcoll_module->previous_ibcast = NULL; - hcoll_module->previous_iallreduce = NULL; - hcoll_module->previous_iallgather = NULL; - hcoll_module->previous_iallgatherv = NULL; - hcoll_module->previous_igatherv = NULL; - hcoll_module->previous_ireduce = NULL; - hcoll_module->previous_ialltoall = NULL; - hcoll_module->previous_ialltoallv = NULL; - - hcoll_module->previous_barrier_module = NULL; - hcoll_module->previous_bcast_module = NULL; - hcoll_module->previous_allreduce_module = NULL; - hcoll_module->previous_reduce_module = NULL; - hcoll_module->previous_allgather_module = NULL; - hcoll_module->previous_allgatherv_module = NULL; - hcoll_module->previous_gather_module = NULL; - hcoll_module->previous_gatherv_module = NULL; - hcoll_module->previous_scatterv_module = NULL; - hcoll_module->previous_alltoall_module = NULL; - hcoll_module->previous_alltoallv_module = NULL; - hcoll_module->previous_alltoallw_module = NULL; - hcoll_module->previous_reduce_scatter_module = NULL; - hcoll_module->previous_ibarrier_module = NULL; - hcoll_module->previous_ibcast_module = NULL; - hcoll_module->previous_iallreduce_module = NULL; - hcoll_module->previous_ireduce_module = NULL; - hcoll_module->previous_iallgather_module = NULL; - hcoll_module->previous_iallgatherv_module = NULL; - hcoll_module->previous_igatherv_module = NULL; - hcoll_module->previous_ialltoall_module = NULL; - hcoll_module->previous_ialltoallv_module = NULL; -} - -static void mca_coll_hcoll_module_construct(mca_coll_hcoll_module_t *hcoll_module) -{ - mca_coll_hcoll_module_clear(hcoll_module); -} - -void mca_coll_hcoll_mem_release_cb(void *buf, size_t length, - void *cbdata, bool from_alloc) -{ - hcoll_mem_unmap(buf, length, cbdata, from_alloc); -} - -static void mca_coll_hcoll_module_destruct(mca_coll_hcoll_module_t *hcoll_module) -{ - int context_destroyed; - - if (hcoll_module->comm == &ompi_mpi_comm_world.comm){ - if (OMPI_SUCCESS != ompi_attr_free_keyval(COMM_ATTR, &hcoll_comm_attr_keyval, 0)) { - HCOL_VERBOSE(1,"hcoll ompi_attr_free_keyval failed"); - } - } - - /* If the hcoll_context is null then we are destroying the hcoll_module - that didn't initialized fallback colls/modules. - Then just clear and return. Otherwise release module pointers and - destroy hcoll context*/ - - if (hcoll_module->hcoll_context != NULL){ - -#if !defined(HAVE_HCOLL_CONTEXT_FREE) - context_destroyed = 0; - hcoll_destroy_context(hcoll_module->hcoll_context, - (rte_grp_handle_t)hcoll_module->comm, - &context_destroyed); -#endif - } - mca_coll_hcoll_module_clear(hcoll_module); -} - -#define HCOL_INSTALL_COLL_API(__comm, __module, __api) \ - do \ - { \ - if (NULL != __module->super.coll_##__api) \ - { \ - if (comm->c_coll->coll_##__api && !comm->c_coll->coll_##__api##_module) \ - { \ - /* save the current selected collective */ \ - MCA_COLL_SAVE_API(__comm, __api, hcoll_module->previous_##__api, hcoll_module->previous_##__api##_module, "hcoll"); \ - /* install our own */ \ - MCA_COLL_INSTALL_API(__comm, __api, __module->super.coll_##__api, &__module->super, "hcoll"); \ - } \ - } \ - } while (0) - -#define HCOL_UNINSTALL_COLL_API(__comm, __module, __api) \ - do \ - { \ - if (&__module->super == comm->c_coll->coll_##__api##_module) \ - { \ - MCA_COLL_INSTALL_API(__comm, __api, __module->previous_##__api, __module->previous_##__api##_module, "hcoll"); \ - hcoll_module->previous_##__api = NULL; \ - hcoll_module->previous_##__api##_module = NULL; \ - } \ - } while (0) - -static int mca_coll_hcoll_save_coll_handlers(mca_coll_hcoll_module_t *hcoll_module) -{ - ompi_communicator_t *comm; - comm = hcoll_module->comm; - - hcoll_module->super.coll_barrier = hcoll_collectives.coll_barrier ? mca_coll_hcoll_barrier : NULL; - hcoll_module->super.coll_bcast = hcoll_collectives.coll_bcast ? mca_coll_hcoll_bcast : NULL; - hcoll_module->super.coll_allgather = hcoll_collectives.coll_allgather ? mca_coll_hcoll_allgather : NULL; - hcoll_module->super.coll_allgatherv = hcoll_collectives.coll_allgatherv ? mca_coll_hcoll_allgatherv : NULL; - hcoll_module->super.coll_allreduce = hcoll_collectives.coll_allreduce ? mca_coll_hcoll_allreduce : NULL; - hcoll_module->super.coll_alltoall = hcoll_collectives.coll_alltoall ? mca_coll_hcoll_alltoall : NULL; - hcoll_module->super.coll_alltoallv = hcoll_collectives.coll_alltoallv ? mca_coll_hcoll_alltoallv : NULL; - hcoll_module->super.coll_gatherv = hcoll_collectives.coll_gatherv ? mca_coll_hcoll_gatherv : NULL; - hcoll_module->super.coll_scatterv = hcoll_collectives.coll_scatterv ? mca_coll_hcoll_scatterv : NULL; - hcoll_module->super.coll_reduce = hcoll_collectives.coll_reduce ? mca_coll_hcoll_reduce : NULL; - hcoll_module->super.coll_ibarrier = hcoll_collectives.coll_ibarrier ? mca_coll_hcoll_ibarrier : NULL; - hcoll_module->super.coll_ibcast = hcoll_collectives.coll_ibcast ? mca_coll_hcoll_ibcast : NULL; - hcoll_module->super.coll_iallgather = hcoll_collectives.coll_iallgather ? mca_coll_hcoll_iallgather : NULL; -#if HCOLL_API >= HCOLL_VERSION(3, 5) - hcoll_module->super.coll_iallgatherv = hcoll_collectives.coll_iallgatherv ? mca_coll_hcoll_iallgatherv : NULL; -#else - hcoll_module->super.coll_iallgatherv = NULL; -#endif - hcoll_module->super.coll_iallreduce = hcoll_collectives.coll_iallreduce ? mca_coll_hcoll_iallreduce : NULL; -#if HCOLL_API >= HCOLL_VERSION(3, 5) - hcoll_module->super.coll_ireduce = hcoll_collectives.coll_ireduce ? mca_coll_hcoll_ireduce : NULL; -#else - hcoll_module->super.coll_ireduce = NULL; -#endif - hcoll_module->super.coll_gather = /*hcoll_collectives.coll_gather ? mca_coll_hcoll_gather :*/ NULL; - hcoll_module->super.coll_igatherv = hcoll_collectives.coll_igatherv ? mca_coll_hcoll_igatherv : NULL; - hcoll_module->super.coll_ialltoall = /*hcoll_collectives.coll_ialltoall ? mca_coll_hcoll_ialltoall : */ NULL; -#if HCOLL_API >= HCOLL_VERSION(3, 7) - hcoll_module->super.coll_ialltoallv = hcoll_collectives.coll_ialltoallv ? mca_coll_hcoll_ialltoallv : NULL; -#else - hcoll_module->super.coll_ialltoallv = NULL; -#endif -#if HCOLL_API > HCOLL_VERSION(4, 5) - hcoll_module->super.coll_reduce_scatter_block = hcoll_collectives.coll_reduce_scatter_block ? mca_coll_hcoll_reduce_scatter_block : NULL; - hcoll_module->super.coll_reduce_scatter = hcoll_collectives.coll_reduce_scatter ? mca_coll_hcoll_reduce_scatter : NULL; -#endif - - HCOL_INSTALL_COLL_API(comm, hcoll_module, barrier); - HCOL_INSTALL_COLL_API(comm, hcoll_module, bcast); - HCOL_INSTALL_COLL_API(comm, hcoll_module, allreduce); - HCOL_INSTALL_COLL_API(comm, hcoll_module, reduce_scatter_block); - HCOL_INSTALL_COLL_API(comm, hcoll_module, reduce_scatter); - HCOL_INSTALL_COLL_API(comm, hcoll_module, reduce); - HCOL_INSTALL_COLL_API(comm, hcoll_module, allgather); - HCOL_INSTALL_COLL_API(comm, hcoll_module, allgatherv); - HCOL_INSTALL_COLL_API(comm, hcoll_module, gatherv); - HCOL_INSTALL_COLL_API(comm, hcoll_module, scatterv); - HCOL_INSTALL_COLL_API(comm, hcoll_module, alltoall); - HCOL_INSTALL_COLL_API(comm, hcoll_module, alltoallv); - - HCOL_INSTALL_COLL_API(comm, hcoll_module, ibarrier); - HCOL_INSTALL_COLL_API(comm, hcoll_module, ibcast); - HCOL_INSTALL_COLL_API(comm, hcoll_module, iallreduce); - HCOL_INSTALL_COLL_API(comm, hcoll_module, ireduce); - HCOL_INSTALL_COLL_API(comm, hcoll_module, iallgather); - HCOL_INSTALL_COLL_API(comm, hcoll_module, iallgatherv); - HCOL_INSTALL_COLL_API(comm, hcoll_module, igatherv); - HCOL_INSTALL_COLL_API(comm, hcoll_module, ialltoall); - HCOL_INSTALL_COLL_API(comm, hcoll_module, ialltoallv); - - /* - These collectives are not yet part of hcoll, so - don't retain them on hcoll module - HCOL_INSTALL_COLL_API(comm, hcoll_module, reduce_scatter); - HCOL_INSTALL_COLL_API(comm, hcoll_module, gather); - HCOL_INSTALL_COLL_API(comm, hcoll_module, reduce); - HCOL_INSTALL_COLL_API(comm, hcoll_module, allgatherv); - HCOL_INSTALL_COLL_API(comm, hcoll_module, alltoallw); - */ - return OMPI_SUCCESS; -} - - - -/* -** Communicator free callback -*/ -static int hcoll_comm_attr_del_fn(MPI_Comm comm, int keyval, void *attr_val, void *extra) -{ - - mca_coll_hcoll_module_t *hcoll_module; - hcoll_module = (mca_coll_hcoll_module_t*) attr_val; - -#ifdef HAVE_HCOLL_CONTEXT_FREE - hcoll_context_free(hcoll_module->hcoll_context, (rte_grp_handle_t)comm); -#else - hcoll_group_destroy_notify(hcoll_module->hcoll_context); -#endif - return OMPI_SUCCESS; - -} -/* - * Initialize module on the communicator - */ -static int mca_coll_hcoll_module_enable(mca_coll_base_module_t *module, - struct ompi_communicator_t *comm) -{ - int ret; - - if (OMPI_SUCCESS != mca_coll_hcoll_save_coll_handlers((mca_coll_hcoll_module_t *)module)){ - HCOL_ERROR("coll_hcol: mca_coll_hcoll_save_coll_handlers failed"); - return OMPI_ERROR; - } - - ret = ompi_attr_set_c(COMM_ATTR, comm, &comm->c_keyhash, hcoll_comm_attr_keyval, (void *)module, false); - if (OMPI_SUCCESS != ret) { - HCOL_VERBOSE(1,"hcoll ompi_attr_set_c failed"); - return OMPI_ERROR; - } - - return OMPI_SUCCESS; -} - -static int mca_coll_hcoll_module_disable(mca_coll_base_module_t *module, - struct ompi_communicator_t *comm) -{ - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t *)module; - - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, barrier); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, bcast); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, allreduce); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, reduce_scatter_block); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, reduce_scatter); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, reduce); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, allgather); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, allgatherv); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, gatherv); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, scatterv); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, alltoall); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, alltoallv); - - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, ibarrier); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, ibcast); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, iallreduce); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, ireduce); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, iallgather); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, iallgatherv); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, igatherv); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, ialltoall); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, ialltoallv); - - /* - These collectives are not yet part of hcoll, so - don't retain them on hcoll module - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, reduce_scatter); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, gather); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, reduce); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, allgatherv); - HCOL_UNINSTALL_COLL_API(comm, hcoll_module, alltoallw); - */ - return OMPI_SUCCESS; -} - -OBJ_CLASS_INSTANCE(mca_coll_hcoll_dtype_t, - opal_free_list_item_t, - NULL,NULL); - -/* - * Invoked when there's a new communicator that has been created. - * Look at the communicator and decide which set of functions and - * priority we want to return. - */ -mca_coll_base_module_t * -mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority) -{ - mca_coll_base_module_t *module; - mca_coll_hcoll_module_t *hcoll_module; - ompi_attribute_fn_ptr_union_t del_fn; - ompi_attribute_fn_ptr_union_t copy_fn; - mca_coll_hcoll_component_t *cm; - int err; - int rc; - cm = &mca_coll_hcoll_component; - *priority = 0; - module = NULL; - - if (!cm->hcoll_enable){ - return NULL; - } - - if (OMPI_COMM_IS_INTER(comm) || ompi_comm_size(comm) < cm->hcoll_np - || ompi_comm_size(comm) < 2){ - return NULL; - } - - - if (!cm->libhcoll_initialized) - { - /* libhcoll should be initialized here since current implementation of - mxm bcol in libhcoll needs world_group fully functional during init - world_group, i.e. ompi_comm_world, is not ready at hcoll component open - call */ - opal_progress_register(hcoll_progress_fn); - - HCOL_VERBOSE(10,"Calling hcoll_init();"); -#if HCOLL_API >= HCOLL_VERSION(3,2) - hcoll_read_init_opts(&cm->init_opts); - cm->init_opts->base_tag = MCA_COLL_BASE_TAG_HCOLL_BASE; - cm->init_opts->max_tag = mca_pml.pml_max_tag; - cm->init_opts->enable_thread_support = ompi_mpi_thread_multiple; - - rc = hcoll_init_with_opts(&cm->init_opts); -#else - hcoll_set_runtime_tag_offset(MCA_COLL_BASE_TAG_HCOLL_BASE, mca_pml.pml_max_tag); - rc = hcoll_init(); -#endif - - if (HCOLL_SUCCESS != rc){ - cm->hcoll_enable = 0; - opal_progress_unregister(hcoll_progress_fn); - HCOL_ERROR("Hcol library init failed"); - return NULL; - } -#if HCOLL_API >= HCOLL_VERSION(3,2) - if (cm->init_opts->mem_hook_needed) { -#else - if (hcoll_check_mem_release_cb_needed()) { -#endif - rc = mca_base_framework_open(&opal_memory_base_framework, 0); - if (OPAL_SUCCESS != rc) { - HCOL_VERBOSE(1, "failed to initialize memory base framework: %d, " - "memory hooks will not be used", rc); - } else { - if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == - ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & - opal_mem_hooks_support_level())) { - HCOL_VERBOSE(1, "using OPAL memory hooks as external events"); - cm->using_mem_hooks = 1; - opal_mem_hooks_register_release(mca_coll_hcoll_mem_release_cb, NULL); - setenv("MXM_HCOLL_MEM_ON_DEMAND_MAP", "y", 0); - } - } - } else { - cm->using_mem_hooks = 0; - } - copy_fn.attr_communicator_copy_fn = MPI_COMM_NULL_COPY_FN; - del_fn.attr_communicator_delete_fn = hcoll_comm_attr_del_fn; - err = ompi_attr_create_keyval(COMM_ATTR, copy_fn, del_fn, &hcoll_comm_attr_keyval, NULL ,0, NULL); - if (OMPI_SUCCESS != err) { - cm->hcoll_enable = 0; - hcoll_finalize(); - opal_progress_unregister(hcoll_progress_fn); - HCOL_ERROR("Hcol comm keyval create failed"); - return NULL; - } - - if (mca_coll_hcoll_component.derived_types_support_enabled) { - zero_dte_mapping.type = DTE_ZERO; - copy_fn.attr_datatype_copy_fn = MPI_TYPE_NULL_COPY_FN; - del_fn.attr_datatype_delete_fn = hcoll_type_attr_del_fn; - err = ompi_attr_create_keyval(TYPE_ATTR, copy_fn, del_fn, &hcoll_type_attr_keyval, NULL ,0, NULL); - if (OMPI_SUCCESS != err) { - cm->hcoll_enable = 0; - hcoll_finalize(); - opal_progress_unregister(hcoll_progress_fn); - HCOL_ERROR("Hcol type keyval create failed"); - return NULL; - } - } - OBJ_CONSTRUCT(&cm->dtypes, opal_free_list_t); - opal_free_list_init(&cm->dtypes, sizeof(mca_coll_hcoll_dtype_t), - 8, OBJ_CLASS(mca_coll_hcoll_dtype_t), 0, 0, - 32, -1, 32, NULL, 0, NULL, NULL, NULL); - - } - - hcoll_module = OBJ_NEW(mca_coll_hcoll_module_t); - if (!hcoll_module){ - if (!cm->libhcoll_initialized) { - cm->hcoll_enable = 0; - hcoll_finalize(); - opal_progress_unregister(hcoll_progress_fn); - } - return NULL; - } - - hcoll_module->comm = comm; - - HCOL_VERBOSE(10,"Creating hcoll_context for comm %p, comm_id %d, comm_size %d", - (void*)comm,comm->c_index,ompi_comm_size(comm)); - - hcoll_module->hcoll_context = - hcoll_create_context((rte_grp_handle_t)comm); - - if (NULL == hcoll_module->hcoll_context){ - HCOL_VERBOSE(1,"hcoll_create_context returned NULL"); - OBJ_RELEASE(hcoll_module); - if (!cm->libhcoll_initialized) { - cm->hcoll_enable = 0; - hcoll_finalize(); - opal_progress_unregister(hcoll_progress_fn); - } - return NULL; - } - - hcoll_module->super.coll_module_enable = mca_coll_hcoll_module_enable; - hcoll_module->super.coll_module_disable = mca_coll_hcoll_module_disable; - - *priority = cm->hcoll_priority; - module = &hcoll_module->super; - - if (!cm->libhcoll_initialized) { - cm->libhcoll_initialized = true; - } - - return module; -} - - -OBJ_CLASS_INSTANCE(mca_coll_hcoll_module_t, - mca_coll_base_module_t, - mca_coll_hcoll_module_construct, - mca_coll_hcoll_module_destruct); - -static void safety_valve(void) __opal_attribute_destructor__; -void safety_valve(void) { - opal_mem_hooks_unregister_release(mca_coll_hcoll_mem_release_cb); -} diff --git a/ompi/mca/coll/hcoll/coll_hcoll_ops.c b/ompi/mca/coll/hcoll/coll_hcoll_ops.c deleted file mode 100644 index e491899d2dd..00000000000 --- a/ompi/mca/coll/hcoll/coll_hcoll_ops.c +++ /dev/null @@ -1,945 +0,0 @@ -/** - Copyright (c) 2011 Mellanox Technologies. All rights reserved. - Copyright (c) 2015 Research Organization for Information Science - and Technology (RIST). All rights reserved. - Copyright (c) 2018 Cisco Systems, Inc. All rights reserved - $COPYRIGHT$ - - Additional copyrights may follow - - $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/constants.h" -#include "coll_hcoll.h" -#include "hcoll/api/hcoll_constants.h" -#include "coll_hcoll_dtypes.h" -#include "hcoll/api/hcoll_dte.h" -int mca_coll_hcoll_barrier(struct ompi_communicator_t *comm, - mca_coll_base_module_t *module){ - int rc; - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - HCOL_VERBOSE(20,"RUNNING HCOL BARRIER"); - - if (OPAL_UNLIKELY(ompi_mpi_state >= OMPI_MPI_STATE_FINALIZE_STARTED)) { - HCOL_VERBOSE(5, "In finalize, reverting to previous barrier"); - goto orig_barrier; - } - rc = hcoll_collectives.coll_barrier(hcoll_module->hcoll_context); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK BARRIER"); - rc = hcoll_module->previous_barrier(comm,hcoll_module->previous_barrier_module); - } - return rc; -orig_barrier: - return hcoll_module->previous_barrier(comm,hcoll_module->previous_barrier_module); -} - -int mca_coll_hcoll_bcast(void *buff, size_t count, - struct ompi_datatype_t *datatype, int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - dte_data_representation_t dtype; - int rc; - HCOL_VERBOSE(20,"RUNNING HCOL BCAST"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - dtype = ompi_dtype_2_hcoll_dtype(datatype, TRY_FIND_DERIVED); - - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(dtype))) { - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: %s; calling fallback bcast;",datatype->super.name); - rc = hcoll_module->previous_bcast(buff,count,datatype,root, - comm,hcoll_module->previous_bcast_module); - return rc; - } - rc = hcoll_collectives.coll_bcast(buff,count,dtype,root,hcoll_module->hcoll_context); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK BCAST"); - rc = hcoll_module->previous_bcast(buff,count,datatype,root, - comm,hcoll_module->previous_bcast_module); - } - return rc; -} - -int mca_coll_hcoll_allgather(const void *sbuf, size_t scount, - struct ompi_datatype_t *sdtype, - void *rbuf, size_t rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - dte_data_representation_t stype; - dte_data_representation_t rtype; - int rc; - HCOL_VERBOSE(20,"RUNNING HCOL ALLGATHER"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - stype = ompi_dtype_2_hcoll_dtype(sdtype, TRY_FIND_DERIVED); - rtype = ompi_dtype_2_hcoll_dtype(rdtype, TRY_FIND_DERIVED); - if (sbuf == MPI_IN_PLACE) { - stype = rtype; - } - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback allgather;", - sdtype->super.name, - rdtype->super.name); - rc = hcoll_module->previous_allgather(sbuf,scount,sdtype, - rbuf,rcount,rdtype, - comm, - hcoll_module->previous_allgather_module); - return rc; - } - rc = hcoll_collectives.coll_allgather((void *)sbuf,scount,stype,rbuf,rcount,rtype,hcoll_module->hcoll_context); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK ALLGATHER"); - rc = hcoll_module->previous_allgather(sbuf,scount,sdtype, - rbuf,rcount,rdtype, - comm, - hcoll_module->previous_allgather_module); - } - return rc; -} - -int mca_coll_hcoll_allgatherv(const void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, ompi_count_array_t rcount, - ompi_disp_array_t displs, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - dte_data_representation_t stype; - dte_data_representation_t rtype; - int rc; - HCOL_VERBOSE(20,"RUNNING HCOL ALLGATHERV"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); - rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype) - || ompi_count_array_is_64bit(rcount))) { - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback allgatherv;", - sdtype->super.name, - rdtype->super.name); - rc = hcoll_module->previous_allgatherv(sbuf,scount,sdtype, - rbuf,rcount, - displs, - rdtype, - comm, - hcoll_module->previous_allgatherv_module); - return rc; - } - rc = hcoll_collectives.coll_allgatherv((void *)sbuf, - scount, - stype, - rbuf, - (void *)ompi_count_array_ptr(rcount), - (void *)ompi_disp_array_ptr(displs), - rtype, - hcoll_module->hcoll_context); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK ALLGATHERV"); - rc = hcoll_module->previous_allgatherv(sbuf,scount,sdtype, - rbuf,rcount, - displs, - rdtype, - comm, - hcoll_module->previous_allgatherv_module); - } - return rc; -} - -int mca_coll_hcoll_gather(const void *sbuf, size_t scount, - struct ompi_datatype_t *sdtype, - void *rbuf, size_t rcount, - struct ompi_datatype_t *rdtype, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - dte_data_representation_t stype; - dte_data_representation_t rtype; - int rc; - - HCOL_VERBOSE(20,"RUNNING HCOL GATHER"); - - if (root != comm->c_my_rank) { - rdtype = sdtype; - } - - stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); - rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback gather;", - sdtype->super.name, - rdtype->super.name); - rc = hcoll_module->previous_gather(sbuf,scount,sdtype, - rbuf,rcount,rdtype,root, - comm, - hcoll_module->previous_allgather_module); - return rc; - } - rc = hcoll_collectives.coll_gather((void *)sbuf,scount,stype,rbuf,rcount,rtype,root,hcoll_module->hcoll_context); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK GATHER"); - rc = hcoll_module->previous_gather((void *)sbuf,scount,sdtype, - rbuf,rcount,rdtype,root, - comm, - hcoll_module->previous_allgather_module); - } - return rc; - -} - -int mca_coll_hcoll_allreduce(const void *sbuf, void *rbuf, size_t count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - dte_data_representation_t Dtype; - hcoll_dte_op_t *Op; - int rc; - HCOL_VERBOSE(20,"RUNNING HCOL ALLREDUCE"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - Dtype = ompi_dtype_2_hcoll_dtype(dtype, NO_DERIVED); - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(Dtype))){ - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: dtype = %s; calling fallback allreduce;", - dtype->super.name); - rc = hcoll_module->previous_allreduce(sbuf,rbuf, - count,dtype,op, - comm, hcoll_module->previous_allreduce_module); - return rc; - } - - Op = ompi_op_2_hcolrte_op(op); - if (OPAL_UNLIKELY(HCOL_DTE_OP_NULL == Op->id)){ - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"ompi_op_t is not supported: op = %s; calling fallback allreduce;", - op->o_name); - rc = hcoll_module->previous_allreduce(sbuf,rbuf, - count,dtype,op, - comm, hcoll_module->previous_allreduce_module); - return rc; - } - - rc = hcoll_collectives.coll_allreduce((void *)sbuf,rbuf,count,Dtype,Op,hcoll_module->hcoll_context); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK ALLREDUCE"); - rc = hcoll_module->previous_allreduce(sbuf,rbuf, - count,dtype,op, - comm, hcoll_module->previous_allreduce_module); - } - return rc; -} - -int mca_coll_hcoll_reduce(const void *sbuf, void *rbuf, size_t count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - dte_data_representation_t Dtype; - hcoll_dte_op_t *Op; - int rc; - HCOL_VERBOSE(20,"RUNNING HCOL REDUCE"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - Dtype = ompi_dtype_2_hcoll_dtype(dtype, NO_DERIVED); - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(Dtype))){ - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: dtype = %s; calling fallback reduce;", - dtype->super.name); - rc = hcoll_module->previous_reduce(sbuf,rbuf, - count,dtype,op, - root, - comm, hcoll_module->previous_reduce_module); - return rc; - } - - Op = ompi_op_2_hcolrte_op(op); - if (OPAL_UNLIKELY(HCOL_DTE_OP_NULL == Op->id)){ - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"ompi_op_t is not supported: op = %s; calling fallback reduce;", - op->o_name); - rc = hcoll_module->previous_reduce(sbuf,rbuf, - count,dtype,op, - root, - comm, hcoll_module->previous_reduce_module); - return rc; - } - - rc = hcoll_collectives.coll_reduce((void *)sbuf,rbuf,count,Dtype,Op,root,hcoll_module->hcoll_context); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK REDUCE"); - rc = hcoll_module->previous_reduce(sbuf,rbuf, - count,dtype,op, - root, - comm, hcoll_module->previous_reduce_module); - } - return rc; -} - -int mca_coll_hcoll_alltoall(const void *sbuf, size_t scount, - struct ompi_datatype_t *sdtype, - void* rbuf, size_t rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - dte_data_representation_t stype; - dte_data_representation_t rtype; - int rc; - HCOL_VERBOSE(20,"RUNNING HCOL ALLTOALL"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); - rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback alltoall;", - sdtype->super.name, - rdtype->super.name); - rc = hcoll_module->previous_alltoall(sbuf,scount,sdtype, - rbuf,rcount,rdtype, - comm, - hcoll_module->previous_alltoall_module); - return rc; - } - rc = hcoll_collectives.coll_alltoall((void *)sbuf,scount,stype,rbuf,rcount,rtype,hcoll_module->hcoll_context); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK ALLTOALL"); - rc = hcoll_module->previous_alltoall(sbuf,scount,sdtype, - rbuf,rcount,rdtype, - comm, - hcoll_module->previous_alltoall_module); - } - return rc; -} - -int mca_coll_hcoll_alltoallv(const void *sbuf, ompi_count_array_t scounts, ompi_disp_array_t sdisps, - struct ompi_datatype_t *sdtype, - void *rbuf, ompi_count_array_t rcounts, ompi_disp_array_t rdisps, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - dte_data_representation_t stype; - dte_data_representation_t rtype; - int rc; - HCOL_VERBOSE(20,"RUNNING HCOL ALLTOALLV"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); - rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype) - || ompi_count_array_is_64bit(scounts))) { - HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback alltoallv;", - sdtype->super.name, - rdtype->super.name); - rc = hcoll_module->previous_alltoallv(sbuf, scounts, sdisps, sdtype, - rbuf, rcounts, rdisps, rdtype, - comm, hcoll_module->previous_alltoallv_module); - return rc; - } - rc = hcoll_collectives.coll_alltoallv((void *)sbuf, - (void *)ompi_count_array_ptr(scounts), - (void *)ompi_disp_array_ptr(sdisps), - stype, - rbuf, - (void *)ompi_count_array_ptr(rcounts), - (void *)ompi_disp_array_ptr(rdisps), - rtype, - hcoll_module->hcoll_context); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK ALLTOALLV"); - rc = hcoll_module->previous_alltoallv(sbuf, scounts, sdisps, sdtype, - rbuf, rcounts, rdisps, rdtype, - comm, hcoll_module->previous_alltoallv_module); - } - return rc; -} - -int mca_coll_hcoll_gatherv(const void* sbuf, size_t scount, - struct ompi_datatype_t *sdtype, - void* rbuf, ompi_count_array_t rcounts, ompi_disp_array_t displs, - struct ompi_datatype_t *rdtype, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - dte_data_representation_t stype; - dte_data_representation_t rtype; - int rc; - HCOL_VERBOSE(20,"RUNNING HCOL GATHERV"); - - if (root != comm->c_my_rank) { - rdtype = sdtype; - } - - stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); - rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); - - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype) - || ompi_count_array_is_64bit(rcounts))) { - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback gatherv;", - sdtype->super.name, - rdtype->super.name); - rc = hcoll_module->previous_gatherv(sbuf,scount,sdtype, - rbuf, rcounts, displs, rdtype,root, - comm, hcoll_module->previous_gatherv_module); - return rc; - } - rc = hcoll_collectives.coll_gatherv((void *)sbuf, - scount, - stype, - rbuf, - (void *)ompi_count_array_ptr(rcounts), - (void *)ompi_disp_array_ptr(displs), - rtype, - root, - hcoll_module->hcoll_context); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK GATHERV"); - rc = hcoll_module->previous_gatherv(sbuf,scount,sdtype, - rbuf, rcounts, displs, rdtype,root, - comm, hcoll_module->previous_igatherv_module); - } - return rc; - -} - -int mca_coll_hcoll_scatterv(const void* sbuf, ompi_count_array_t scounts, ompi_disp_array_t displs, - struct ompi_datatype_t *sdtype, - void* rbuf, size_t rcount, - struct ompi_datatype_t *rdtype, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - dte_data_representation_t stype; - dte_data_representation_t rtype; - int rc; - - HCOL_VERBOSE(20,"RUNNING HCOL SCATTERV"); - - if (root != comm->c_my_rank) { - sdtype = rdtype; - } - - stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); - rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); - - if (rbuf == MPI_IN_PLACE) { - assert(root == comm->c_my_rank); - rtype = stype; - } - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype) - || ompi_count_array_is_64bit(scounts))) { - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback scatterv;", - sdtype->super.name, - rdtype->super.name); - rc = hcoll_module->previous_scatterv(sbuf, scounts, displs, sdtype, - rbuf, rcount, rdtype, root, - comm, hcoll_module->previous_scatterv_module); - return rc; - } - rc = hcoll_collectives.coll_scatterv((void *)sbuf, - (void *)ompi_count_array_ptr(scounts), - (void *)ompi_disp_array_ptr(displs), - stype, - rbuf, - rcount, - rtype, - root, - hcoll_module->hcoll_context); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK SCATTERV"); - rc = hcoll_module->previous_scatterv(sbuf, scounts, displs, - sdtype, - rbuf, - rcount, - rdtype, - root, - comm, - hcoll_module->previous_scatterv_module); - } - return rc; -} - -int mca_coll_hcoll_ibarrier(struct ompi_communicator_t *comm, - ompi_request_t ** request, - mca_coll_base_module_t *module) -{ - int rc; - void** rt_handle; - HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING BARRIER"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - rt_handle = (void**) request; - rc = hcoll_collectives.coll_ibarrier(hcoll_module->hcoll_context, rt_handle); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK NON-BLOCKING BARRIER"); - rc = hcoll_module->previous_ibarrier(comm, request, hcoll_module->previous_ibarrier_module); - } - return rc; -} - -int mca_coll_hcoll_ibcast(void *buff, size_t count, - struct ompi_datatype_t *datatype, int root, - struct ompi_communicator_t *comm, - ompi_request_t ** request, - mca_coll_base_module_t *module) -{ - dte_data_representation_t dtype; - int rc; - void** rt_handle; - HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING BCAST"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - rt_handle = (void**) request; - dtype = ompi_dtype_2_hcoll_dtype(datatype, TRY_FIND_DERIVED); - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(dtype))){ - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: %s; calling fallback non-blocking bcast;",datatype->super.name); - rc = hcoll_module->previous_ibcast(buff,count,datatype,root, - comm, request, hcoll_module->previous_ibcast_module); - return rc; - } - rc = hcoll_collectives.coll_ibcast(buff, count, dtype, root, rt_handle, hcoll_module->hcoll_context); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK NON-BLOCKING BCAST"); - rc = hcoll_module->previous_ibcast(buff,count,datatype,root, - comm, request, hcoll_module->previous_ibcast_module); - } - return rc; -} - -int mca_coll_hcoll_iallgather(const void *sbuf, size_t scount, - struct ompi_datatype_t *sdtype, - void *rbuf, size_t rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - ompi_request_t ** request, - mca_coll_base_module_t *module) -{ - dte_data_representation_t stype; - dte_data_representation_t rtype; - int rc; - void** rt_handle; - HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING ALLGATHER"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - rt_handle = (void**) request; - stype = ompi_dtype_2_hcoll_dtype(sdtype, TRY_FIND_DERIVED); - rtype = ompi_dtype_2_hcoll_dtype(rdtype, TRY_FIND_DERIVED); - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback non-blocking allgather;", - sdtype->super.name, - rdtype->super.name); - rc = hcoll_module->previous_iallgather(sbuf,scount,sdtype, - rbuf,rcount,rdtype, - comm, - request, - hcoll_module->previous_iallgather_module); - return rc; - } - rc = hcoll_collectives.coll_iallgather((void *)sbuf, scount, stype, rbuf, rcount, rtype, hcoll_module->hcoll_context, rt_handle); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK NON-BLOCKING ALLGATHER"); - rc = hcoll_module->previous_iallgather(sbuf,scount,sdtype, - rbuf,rcount,rdtype, - comm, - request, - hcoll_module->previous_iallgather_module); - } - return rc; -} -#if HCOLL_API >= HCOLL_VERSION(3,5) -int mca_coll_hcoll_iallgatherv(const void *sbuf, size_t scount, - struct ompi_datatype_t *sdtype, - void *rbuf, ompi_count_array_t rcount, - ompi_disp_array_t displs, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - ompi_request_t ** request, - mca_coll_base_module_t *module) -{ - dte_data_representation_t stype; - dte_data_representation_t rtype; - int rc; - HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING ALLGATHERV"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); - rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); - void **rt_handle = (void **) request; - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype) - || ompi_count_array_is_64bit(rcount))) { - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback non-blocking allgatherv;", - sdtype->super.name, - rdtype->super.name); - rc = hcoll_module->previous_iallgatherv(sbuf,scount,sdtype, - rbuf,rcount, - displs, - rdtype, - comm, - request, - hcoll_module->previous_iallgatherv_module); - return rc; - } - rc = hcoll_collectives.coll_iallgatherv((void *)sbuf, - scount, - stype, - rbuf, - (void *)ompi_count_array_ptr(rcount), - (void *)ompi_disp_array_ptr(displs), - rtype, - hcoll_module->hcoll_context, rt_handle); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK NON-BLOCKING ALLGATHER"); - rc = hcoll_module->previous_iallgatherv(sbuf,scount,sdtype, - rbuf,rcount, - displs, - rdtype, - comm, - request, - hcoll_module->previous_iallgatherv_module); - } - return rc; -} -#endif -int mca_coll_hcoll_iallreduce(const void *sbuf, void *rbuf, size_t count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm, - ompi_request_t ** request, - mca_coll_base_module_t *module) -{ - dte_data_representation_t Dtype; - hcoll_dte_op_t *Op; - int rc; - void** rt_handle; - HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING ALLREDUCE"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - rt_handle = (void**) request; - Dtype = ompi_dtype_2_hcoll_dtype(dtype, NO_DERIVED); - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(Dtype))){ - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: dtype = %s; calling fallback non-blocking allreduce;", - dtype->super.name); - rc = hcoll_module->previous_iallreduce(sbuf,rbuf, - count,dtype,op, - comm, request, hcoll_module->previous_iallreduce_module); - return rc; - } - - Op = ompi_op_2_hcolrte_op(op); - if (OPAL_UNLIKELY(HCOL_DTE_OP_NULL == Op->id)){ - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"ompi_op_t is not supported: op = %s; calling fallback non-blocking allreduce;", - op->o_name); - rc = hcoll_module->previous_iallreduce(sbuf,rbuf, - count,dtype,op, - comm, request, hcoll_module->previous_iallreduce_module); - return rc; - } - - rc = hcoll_collectives.coll_iallreduce((void *)sbuf, rbuf, count, Dtype, Op, hcoll_module->hcoll_context, rt_handle); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK NON-BLOCKING ALLREDUCE"); - rc = hcoll_module->previous_iallreduce(sbuf,rbuf, - count,dtype,op, - comm, request, hcoll_module->previous_iallreduce_module); - } - return rc; -} -#if HCOLL_API >= HCOLL_VERSION(3,5) -int mca_coll_hcoll_ireduce(const void *sbuf, void *rbuf, size_t count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - int root, - struct ompi_communicator_t *comm, - ompi_request_t ** request, - mca_coll_base_module_t *module) -{ - dte_data_representation_t Dtype; - hcoll_dte_op_t *Op; - int rc; - HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING REDUCE"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - Dtype = ompi_dtype_2_hcoll_dtype(dtype, NO_DERIVED); - void **rt_handle = (void**) request; - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(Dtype))){ - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: dtype = %s; calling fallback non-blocking reduce;", - dtype->super.name); - rc = hcoll_module->previous_ireduce(sbuf,rbuf,count,dtype,op, - root, - comm, request, - hcoll_module->previous_ireduce_module); - return rc; - } - - Op = ompi_op_2_hcolrte_op(op); - if (OPAL_UNLIKELY(HCOL_DTE_OP_NULL == Op->id)){ - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"ompi_op_t is not supported: op = %s; calling fallback non-blocking reduce;", - op->o_name); - rc = hcoll_module->previous_ireduce(sbuf,rbuf, - count,dtype,op, - root, - comm, request, - hcoll_module->previous_ireduce_module); - return rc; - } - - rc = hcoll_collectives.coll_ireduce((void *)sbuf,rbuf,count,Dtype,Op,root,hcoll_module->hcoll_context,rt_handle); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK NON-BLOCKING REDUCE"); - rc = hcoll_module->previous_ireduce(sbuf,rbuf, - count,dtype,op, - root, - comm, - request, - hcoll_module->previous_ireduce_module); - } - return rc; -} -#endif -int mca_coll_hcoll_igatherv(const void* sbuf, size_t scount, - struct ompi_datatype_t *sdtype, - void* rbuf, ompi_count_array_t rcounts, ompi_disp_array_t displs, - struct ompi_datatype_t *rdtype, - int root, - struct ompi_communicator_t *comm, - ompi_request_t ** request, - mca_coll_base_module_t *module) -{ - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - dte_data_representation_t stype; - dte_data_representation_t rtype; - int rc; - void** rt_handle; - - HCOL_VERBOSE(20,"RUNNING HCOL IGATHERV"); - - rt_handle = (void**) request; - - if (root != comm->c_my_rank) { - rdtype = sdtype; - } - - stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); - rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype) - || ompi_count_array_is_64bit(rcounts))) { - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback igatherv;", - sdtype->super.name, - rdtype->super.name); - rc = hcoll_module->previous_igatherv(sbuf,scount,sdtype, - rbuf, rcounts, displs, rdtype,root, - comm, request, - hcoll_module->previous_igatherv_module); - return rc; - } - rc = hcoll_collectives.coll_igatherv((void *)sbuf, - scount, - stype, - rbuf, - (void *)ompi_count_array_ptr(rcounts), - (void *)ompi_disp_array_ptr(displs), - rtype, - root, - hcoll_module->hcoll_context, - rt_handle); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK IGATHERV"); - rc = hcoll_module->previous_igatherv(sbuf,scount,sdtype, - rbuf, rcounts, displs, rdtype,root, - comm, request, - hcoll_module->previous_igatherv_module); - } - return rc; - -} - - -#if HCOLL_API >= HCOLL_VERSION(3,7) -int mca_coll_hcoll_ialltoallv(const void *sbuf, ompi_count_array_t scounts, ompi_disp_array_t sdisps, - struct ompi_datatype_t *sdtype, - void *rbuf, ompi_count_array_t rcounts, ompi_disp_array_t rdisps, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - ompi_request_t ** request, - mca_coll_base_module_t *module) -{ - dte_data_representation_t stype; - dte_data_representation_t rtype; - int rc; - HCOL_VERBOSE(20,"RUNNING HCOL IALLTOALLV"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); - rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype) - || ompi_count_array_is_64bit(scounts))) { - HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback ialltoallv;", - sdtype->super.name, - rdtype->super.name); - rc = hcoll_module->previous_ialltoallv(sbuf, scounts, sdisps, sdtype, - rbuf, rcounts, rdisps, rdtype, - comm, request, hcoll_module->previous_alltoallv_module); - return rc; - } - rc = hcoll_collectives.coll_ialltoallv((void *)sbuf, - (void *)ompi_count_array_ptr(scounts), - (void *)ompi_disp_array_ptr(sdisps), - stype, - rbuf, - (void *)ompi_count_array_ptr(rcounts), - (void *)ompi_disp_array_ptr(rdisps), - rtype, - hcoll_module->hcoll_context, - (void**)request); - if (HCOLL_SUCCESS != rc){ - HCOL_VERBOSE(20,"RUNNING FALLBACK IALLTOALLV"); - rc = hcoll_module->previous_ialltoallv(sbuf, scounts, sdisps, sdtype, - rbuf, rcounts, rdisps, rdtype, - comm, request, hcoll_module->previous_alltoallv_module); - } - return rc; -} -#endif - -#if HCOLL_API > HCOLL_VERSION(4,5) -int mca_coll_hcoll_reduce_scatter_block(const void *sbuf, void *rbuf, size_t rcount, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) { - dte_data_representation_t Dtype; - hcoll_dte_op_t *Op; - int rc; - HCOL_VERBOSE(20,"RUNNING HCOL REDUCE SCATTER BLOCK"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - Dtype = ompi_dtype_2_hcoll_dtype(dtype, NO_DERIVED); - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(Dtype))){ - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: dtype = %s; calling fallback allreduce;", - dtype->super.name); - goto fallback; - } - - Op = ompi_op_2_hcolrte_op(op); - if (OPAL_UNLIKELY(HCOL_DTE_OP_NULL == Op->id)){ - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"ompi_op_t is not supported: op = %s; calling fallback allreduce;", - op->o_name); - goto fallback; - } - - rc = hcoll_collectives.coll_reduce_scatter_block((void *)sbuf,rbuf,rcount,Dtype,Op,hcoll_module->hcoll_context); - if (HCOLL_SUCCESS != rc){ - fallback: - HCOL_VERBOSE(20,"RUNNING FALLBACK ALLREDUCE"); - rc = hcoll_module->previous_reduce_scatter_block(sbuf,rbuf, - rcount,dtype,op, - comm, hcoll_module->previous_allreduce_module); - } - return rc; -} - -int mca_coll_hcoll_reduce_scatter(const void *sbuf, void *rbuf, ompi_count_array_t rcounts, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) { - dte_data_representation_t Dtype; - hcoll_dte_op_t *Op; - int rc; - HCOL_VERBOSE(20,"RUNNING HCOL REDUCE SCATTER"); - mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - Dtype = ompi_dtype_2_hcoll_dtype(dtype, NO_DERIVED); - if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(Dtype) || ompi_count_array_is_64bit(rcounts))){ - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"Ompi_datatype is not supported: dtype = %s; calling fallback allreduce;", - dtype->super.name); - goto fallback; - } - - Op = ompi_op_2_hcolrte_op(op); - if (OPAL_UNLIKELY(HCOL_DTE_OP_NULL == Op->id)){ - /*If we are here then datatype is not simple predefined datatype */ - /*In future we need to add more complex mapping to the dte_data_representation_t */ - /* Now use fallback */ - HCOL_VERBOSE(20,"ompi_op_t is not supported: op = %s; calling fallback allreduce;", - op->o_name); - goto fallback; - } - - rc = hcoll_collectives.coll_reduce_scatter((void *)sbuf, - rbuf, - (void *)ompi_count_array_ptr(rcounts), - Dtype, - Op, - hcoll_module->hcoll_context); - if (HCOLL_SUCCESS != rc){ - fallback: - HCOL_VERBOSE(20,"RUNNING FALLBACK ALLREDUCE"); - rc = hcoll_module->previous_reduce_scatter(sbuf,rbuf, - rcounts,dtype,op, - comm, hcoll_module->previous_allreduce_module); - } - return rc; -} -#endif diff --git a/ompi/mca/coll/hcoll/coll_hcoll_rte.c b/ompi/mca/coll/hcoll/coll_hcoll_rte.c deleted file mode 100644 index 882880f645e..00000000000 --- a/ompi/mca/coll/hcoll/coll_hcoll_rte.c +++ /dev/null @@ -1,487 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2015-2019 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#ifdef HAVE_SYS_MMAN_H -#include -#endif -#include -#include - -#include "coll_hcoll.h" - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/datatype/ompi_datatype_internal.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/coll/base/coll_base_util.h" - -#include "hcoll/api/hcoll_dte.h" -#include "hcoll/api/hcoll_api.h" -#include "hcoll/api/hcoll_constants.h" -#include "coll_hcoll_dtypes.h" -/* - * Local functions - */ - - -static int recv_nb(dte_data_representation_t data , - size_t count , - void *buffer, - rte_ec_handle_t , - rte_grp_handle_t , - uint32_t tag, - rte_request_handle_t * req); - -static int send_nb(dte_data_representation_t data, - size_t count, - void *buffer, - rte_ec_handle_t ec_h, - rte_grp_handle_t grp_h, - uint32_t tag, rte_request_handle_t *req); - -static int test( rte_request_handle_t * request , - int * completed ); - -static int ec_handle_compare( rte_ec_handle_t handle_1 , - rte_grp_handle_t - group_handle_1 , - rte_ec_handle_t handle_2 , - rte_grp_handle_t - group_handle_2 ); - -static int get_ec_handles( int num_ec , - int * ec_indexes , - rte_grp_handle_t , - rte_ec_handle_t * ec_handles ); - -#if 0 /* This callback is not used */ -static int get_my_ec(rte_grp_handle_t , rte_ec_handle_t *ec_handle); -#endif - -static int group_size ( rte_grp_handle_t group ); -static int my_rank (rte_grp_handle_t grp_h); -static int ec_on_local_node (rte_ec_handle_t ec, rte_grp_handle_t group); -static rte_grp_handle_t get_world_group_handle(void); -static uint32_t jobid(void); - -static void progress(void){ - opal_progress(); -} - -static void* get_coll_handle(void); -static int coll_handle_test(void* handle); -static void coll_handle_free(void *handle); -static void coll_handle_complete(void *handle); -static int group_id(rte_grp_handle_t group); - -static int world_rank(rte_grp_handle_t grp_h, rte_ec_handle_t ec); -/* Module Constructors */ -#if HCOLL_API >= HCOLL_VERSION(3,6) -static int get_mpi_type_envelope(void *mpi_type, int *num_integers, - int *num_addresses, int *num_datatypes, - hcoll_mpi_type_combiner_t *combiner); -static int get_mpi_type_contents(void *mpi_type, int max_integers, int max_addresses, - int max_datatypes, int *array_of_integers, - void *array_of_addresses, void *array_of_datatypes); -static int get_hcoll_type(void *mpi_type, dte_data_representation_t *hcoll_type); -static int set_hcoll_type(void *mpi_type, dte_data_representation_t hcoll_type); -static int get_mpi_constants(size_t *mpi_datatype_size, - int *mpi_order_c, int *mpi_order_fortran, - int *mpi_distribute_block, - int *mpi_distribute_cyclic, - int *mpi_distribute_none, - int *mpi_distribute_dflt_darg); -#endif - -static void init_module_fns(void){ - hcoll_rte_functions.send_fn = send_nb; - hcoll_rte_functions.recv_fn = recv_nb; - hcoll_rte_functions.ec_cmp_fn = ec_handle_compare; - hcoll_rte_functions.get_ec_handles_fn = get_ec_handles; - hcoll_rte_functions.rte_group_size_fn = group_size; - hcoll_rte_functions.test_fn = test; - hcoll_rte_functions.rte_my_rank_fn = my_rank; - hcoll_rte_functions.rte_ec_on_local_node_fn = ec_on_local_node; - hcoll_rte_functions.rte_world_group_fn = get_world_group_handle; - hcoll_rte_functions.rte_jobid_fn = jobid; - hcoll_rte_functions.rte_progress_fn = progress; - hcoll_rte_functions.rte_get_coll_handle_fn = get_coll_handle; - hcoll_rte_functions.rte_coll_handle_test_fn = coll_handle_test; - hcoll_rte_functions.rte_coll_handle_free_fn = coll_handle_free; - hcoll_rte_functions.rte_coll_handle_complete_fn = coll_handle_complete; - hcoll_rte_functions.rte_group_id_fn = group_id; - hcoll_rte_functions.rte_world_rank_fn = world_rank; -#if HCOLL_API >= HCOLL_VERSION(3,6) - hcoll_rte_functions.rte_get_mpi_type_envelope_fn = get_mpi_type_envelope; - hcoll_rte_functions.rte_get_mpi_type_contents_fn = get_mpi_type_contents; - hcoll_rte_functions.rte_get_hcoll_type_fn = get_hcoll_type; - hcoll_rte_functions.rte_set_hcoll_type_fn = set_hcoll_type; - hcoll_rte_functions.rte_get_mpi_constants_fn = get_mpi_constants; -#endif -} - - -void hcoll_rte_fns_setup(void) -{ - init_module_fns(); - OBJ_CONSTRUCT(&mca_coll_hcoll_component.requests, opal_free_list_t); - opal_free_list_init(&(mca_coll_hcoll_component.requests), - sizeof(ompi_coll_base_nbc_request_t), - opal_cache_line_size, OBJ_CLASS(ompi_coll_base_nbc_request_t), - /* no payload data */ - 0, 0, 10, -1, 10, - /* No Mpool or init function */ - NULL, 0, NULL, NULL, NULL); -} - -static int recv_nb(struct dte_data_representation_t data, - size_t count, - void *buffer, - rte_ec_handle_t ec_h, - rte_grp_handle_t grp_h, - uint32_t tag, - rte_request_handle_t *req) -{ - ompi_communicator_t *comm = (ompi_communicator_t *)grp_h; - - if (NULL == ec_h.handle && -1 != ec_h.rank) { - fprintf(stderr,"***Error in hcolrte_rml_recv_nb: wrong null argument: " - "ec_h.handle = %p, ec_h.rank = %d\n",ec_h.handle,ec_h.rank); - return HCOLL_ERROR; - } - assert(HCOL_DTE_IS_INLINE(data)); - /*do inline nb recv*/ - size_t size; - ompi_request_t *ompi_req; - - if (!buffer && !HCOL_DTE_IS_ZERO(data)) { - fprintf(stderr, "***Error in hcolrte_rml_recv_nb: buffer pointer is NULL" - " for non DTE_ZERO INLINE data representation\n"); - return HCOLL_ERROR; - } - size = (size_t)data.rep.in_line_rep.data_handle.in_line.packed_size*count/8; - - HCOL_VERBOSE(30,"PML_IRECV: dest = %d: buf = %p: size = %zu: comm = %p", - ec_h.rank, buffer, size, (void *)comm); - if (MCA_PML_CALL(irecv(buffer,size,&(ompi_mpi_unsigned_char.dt),ec_h.rank, - tag,comm,&ompi_req))) - { - return HCOLL_ERROR; - } - req->data = (void *)ompi_req; - req->status = HCOLRTE_REQUEST_ACTIVE; - - return HCOLL_SUCCESS; -} - - -static int send_nb( dte_data_representation_t data, - size_t count, - void *buffer, - rte_ec_handle_t ec_h, - rte_grp_handle_t grp_h, - uint32_t tag, - rte_request_handle_t *req) -{ - ompi_communicator_t *comm = (ompi_communicator_t *)grp_h; - - if (! ec_h.handle) { - fprintf(stderr,"***Error in hcolrte_rml_send_nb: wrong null argument: " - "ec_h.handle = %p, ec_h.rank = %d\n",ec_h.handle,ec_h.rank); - return HCOLL_ERROR; - } - assert(HCOL_DTE_IS_INLINE(data)); - /*do inline nb recv*/ - size_t size; - ompi_request_t *ompi_req; - if (!buffer && !HCOL_DTE_IS_ZERO(data)) { - fprintf(stderr, "***Error in hcolrte_rml_send_nb: buffer pointer is NULL" - " for non DTE_ZERO INLINE data representation\n"); - return HCOLL_ERROR; - } - size = (size_t)data.rep.in_line_rep.data_handle.in_line.packed_size*count/8; - HCOL_VERBOSE(30,"PML_ISEND: dest = %d: buf = %p: size = %zu: comm = %p", - ec_h.rank, buffer, size, (void *)comm); - if (MCA_PML_CALL(isend(buffer,size,&(ompi_mpi_unsigned_char.dt),ec_h.rank, - tag,MCA_PML_BASE_SEND_STANDARD,comm,&ompi_req))) - { - return HCOLL_ERROR; - } - req->data = (void *)ompi_req; - req->status = HCOLRTE_REQUEST_ACTIVE; - return HCOLL_SUCCESS; -} - -static int test( rte_request_handle_t * request , - int * completed ) -{ - ompi_request_t * ompi_req = (ompi_request_t *)request->data; - if (HCOLRTE_REQUEST_ACTIVE != request->status){ - *completed = true; - return HCOLL_SUCCESS; - } - - /*ompi_request_test(&ompi_req,completed,MPI_STATUS_IGNORE); */ - *completed = REQUEST_COMPLETE(ompi_req); - if (*completed){ - ompi_request_free(&ompi_req); - request->status = HCOLRTE_REQUEST_DONE; - } - - return HCOLL_SUCCESS; -} - -static int ec_handle_compare( rte_ec_handle_t handle_1 , - rte_grp_handle_t - group_handle_1 , - rte_ec_handle_t handle_2 , - rte_grp_handle_t - group_handle_2 ) -{ - return handle_1.handle == handle_2.handle; -} - -static int get_ec_handles( int num_ec , - int * ec_indexes , - rte_grp_handle_t grp_h, - rte_ec_handle_t * ec_handles ) -{ - int i; - ompi_communicator_t *comm = (ompi_communicator_t *)grp_h; - for (i=0; ihandle = (void *)my_proc; - ec_handle->rank = my_rank; - return HCOLL_SUCCESS; -} -#endif - -static int group_size ( rte_grp_handle_t grp_h ) -{ - return ompi_comm_size((ompi_communicator_t *)grp_h); -} - -static int my_rank (rte_grp_handle_t grp_h ) -{ - return ompi_comm_rank((ompi_communicator_t *)grp_h); -} - -static int ec_on_local_node (rte_ec_handle_t ec, rte_grp_handle_t group){ - ompi_proc_t *proc = (ompi_proc_t *)ec.handle; - return OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags); -} - - -static rte_grp_handle_t get_world_group_handle(void) -{ - return (rte_grp_handle_t)&ompi_mpi_comm_world.comm; -} - -static uint32_t jobid(void){ - return OMPI_PROC_MY_NAME->jobid; -} - -static int group_id(rte_grp_handle_t group){ - return ((ompi_communicator_t *)group)->c_index; -} - -static int -request_free(struct ompi_request_t **ompi_req) -{ - ompi_request_t *req = *ompi_req; - if (!coll_handle_test(req)) { - return OMPI_ERROR; - } - coll_handle_free(req); - *ompi_req = MPI_REQUEST_NULL; - return OMPI_SUCCESS; -} - -static void* get_coll_handle(void) -{ - ompi_coll_base_nbc_request_t *ompi_req; - opal_free_list_item_t *item; - item = opal_free_list_wait (&(mca_coll_hcoll_component.requests)); - if (OPAL_UNLIKELY(NULL == item)) { - HCOL_ERROR("Wait for free list failed.\n"); - return NULL; - } - ompi_req = (ompi_coll_base_nbc_request_t *)item; - OMPI_REQUEST_INIT(&ompi_req->super,false); - ompi_req->super.req_complete_cb = NULL; - ompi_req->super.req_complete_cb_data = NULL; - ompi_req->super.req_status.MPI_ERROR = MPI_SUCCESS; - ompi_req->super.req_state = OMPI_REQUEST_ACTIVE; - ompi_req->super.req_free = request_free; - ompi_req->super.req_type = OMPI_REQUEST_COLL; - ompi_req->data.refcounted.objs.objs[0] = NULL; - ompi_req->data.refcounted.objs.objs[1] = NULL; - return (void *)ompi_req; -} - -static int coll_handle_test(void* handle) -{ - ompi_request_t *ompi_req = (ompi_request_t *)handle; - return REQUEST_COMPLETE(ompi_req);; -} - -static void coll_handle_free(void *handle){ - ompi_request_t *ompi_req = (ompi_request_t *)handle; - opal_free_list_return (&mca_coll_hcoll_component.requests, - (opal_free_list_item_t *)ompi_req); -} - -static void coll_handle_complete(void *handle) -{ - ompi_request_t *ompi_req = (ompi_request_t *)handle; - ompi_request_complete(ompi_req,true); -} - - -static int world_rank(rte_grp_handle_t grp_h, rte_ec_handle_t ec){ - ompi_proc_t *proc = (ompi_proc_t *)ec.handle; - return ((ompi_process_name_t*)&proc->super.proc_name)->vpid; -} - -#if HCOLL_API >= HCOLL_VERSION(3,6) -hcoll_mpi_type_combiner_t ompi_combiner_2_hcoll_combiner(int ompi_combiner) { - switch (ompi_combiner) - { - case MPI_COMBINER_CONTIGUOUS: - return HCOLL_MPI_COMBINER_CONTIGUOUS; - case MPI_COMBINER_VECTOR: - return HCOLL_MPI_COMBINER_VECTOR; - case MPI_COMBINER_HVECTOR: - return HCOLL_MPI_COMBINER_HVECTOR; - case MPI_COMBINER_INDEXED: - return HCOLL_MPI_COMBINER_INDEXED; - case MPI_COMBINER_HINDEXED_INTEGER: - case MPI_COMBINER_HINDEXED: - return HCOLL_MPI_COMBINER_HINDEXED; - case MPI_COMBINER_DUP: - return HCOLL_MPI_COMBINER_DUP; - case MPI_COMBINER_INDEXED_BLOCK: - return HCOLL_MPI_COMBINER_INDEXED_BLOCK; - case MPI_COMBINER_HINDEXED_BLOCK: - return HCOLL_MPI_COMBINER_HINDEXED_BLOCK; - case MPI_COMBINER_SUBARRAY: - return HCOLL_MPI_COMBINER_SUBARRAY; - case MPI_COMBINER_DARRAY: - return HCOLL_MPI_COMBINER_DARRAY; - case MPI_COMBINER_F90_REAL: - return HCOLL_MPI_COMBINER_F90_REAL; - case MPI_COMBINER_F90_COMPLEX: - return HCOLL_MPI_COMBINER_F90_COMPLEX; - case MPI_COMBINER_F90_INTEGER: - return HCOLL_MPI_COMBINER_F90_INTEGER; - case MPI_COMBINER_RESIZED: - return HCOLL_MPI_COMBINER_RESIZED; - case MPI_COMBINER_STRUCT: - case MPI_COMBINER_STRUCT_INTEGER: - return HCOLL_MPI_COMBINER_STRUCT; - default: - break; - } - return HCOLL_MPI_COMBINER_LAST; -} - - -static int get_mpi_type_envelope(void *mpi_type, int *num_integers, - int *num_addresses, int *num_datatypes, - hcoll_mpi_type_combiner_t *combiner) { - int ompi_combiner, rc; - rc = ompi_datatype_get_args( (ompi_datatype_t*)mpi_type, 0, num_integers, NULL, - num_addresses, NULL, - num_datatypes, NULL, &ompi_combiner); - *combiner = ompi_combiner_2_hcoll_combiner(ompi_combiner); - return rc == OMPI_SUCCESS ? HCOLL_SUCCESS : HCOLL_ERROR; -} - -static int get_mpi_type_contents(void *mpi_type, int max_integers, int max_addresses, - int max_datatypes, int *array_of_integers, - void *array_of_addresses, void *array_of_datatypes) { - int rc; - rc = ompi_datatype_get_args( (ompi_datatype_t*)mpi_type, 1, &max_integers, array_of_integers, - &max_addresses, array_of_addresses, - &max_datatypes, array_of_datatypes, NULL ); - return rc == OMPI_SUCCESS ? HCOLL_SUCCESS : HCOLL_ERROR; -} - -static int get_hcoll_type(void *mpi_type, dte_data_representation_t *hcoll_type) { - *hcoll_type = ompi_dtype_2_hcoll_dtype((ompi_datatype_t*)mpi_type, TRY_FIND_DERIVED); - return HCOL_DTE_IS_ZERO((*hcoll_type)) ? HCOLL_ERR_NOT_FOUND : HCOLL_SUCCESS; -} - -static int set_hcoll_type(void *mpi_type, dte_data_representation_t hcoll_type) { - int rc; - mca_coll_hcoll_dtype_t *hcoll_dtype = (mca_coll_hcoll_dtype_t*) - opal_free_list_get(&mca_coll_hcoll_component.dtypes); - ompi_datatype_t *dtype = (ompi_datatype_t*)mpi_type; - hcoll_dtype->type = hcoll_type; - rc = ompi_attr_set_c(TYPE_ATTR, (void*)dtype, &(dtype->d_keyhash), hcoll_type_attr_keyval, (void *)hcoll_dtype, false); - if (OMPI_SUCCESS != rc) { - HCOL_VERBOSE(1,"hcoll ompi_attr_set_c failed for derived dtype"); - goto Cleanup; - } - return HCOLL_SUCCESS; -Cleanup: - opal_free_list_return(&mca_coll_hcoll_component.dtypes, - &hcoll_dtype->super); - return rc; -} - -static int get_mpi_constants(size_t *mpi_datatype_size, - int *mpi_order_c, int *mpi_order_fortran, - int *mpi_distribute_block, - int *mpi_distribute_cyclic, - int *mpi_distribute_none, - int *mpi_distribute_dflt_darg) { - *mpi_datatype_size = sizeof(MPI_Datatype); - *mpi_order_c = MPI_ORDER_C; - *mpi_order_fortran = MPI_ORDER_FORTRAN; - *mpi_distribute_block = MPI_DISTRIBUTE_BLOCK; - *mpi_distribute_cyclic = MPI_DISTRIBUTE_CYCLIC; - *mpi_distribute_none = MPI_DISTRIBUTE_NONE; - *mpi_distribute_dflt_darg = MPI_DISTRIBUTE_DFLT_DARG; - return HCOLL_SUCCESS; -} - -#endif diff --git a/ompi/mca/coll/hcoll/configure.m4 b/ompi/mca/coll/hcoll/configure.m4 deleted file mode 100644 index 3d2c2b3a581..00000000000 --- a/ompi/mca/coll/hcoll/configure.m4 +++ /dev/null @@ -1,38 +0,0 @@ -# -*- shell-script -*- -# -# -# Copyright (c) 2011 Mellanox Technologies. All rights reserved. -# Copyright (c) 2015 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - - -# MCA_coll_hcoll_CONFIG([action-if-can-compile], -# [action-if-cant-compile]) -# ------------------------------------------------ -AC_DEFUN([MCA_ompi_coll_hcoll_CONFIG],[ - AC_CONFIG_FILES([ompi/mca/coll/hcoll/Makefile]) - - OMPI_CHECK_HCOLL([coll_hcoll], - [coll_hcoll_happy="yes"], - [coll_hcoll_happy="no"]) - - AS_IF([test "$coll_hcoll_happy" = "yes"], - [coll_hcoll_WRAPPER_EXTRA_LDFLAGS="$coll_hcoll_LDFLAGS" - coll_hcoll_CPPFLAGS="$coll_hcoll_CPPFLAGS" - coll_hcoll_WRAPPER_EXTRA_LIBS="$coll_hcoll_LIBS" - $1], - [$2]) - - # substitute in the things needed to build hcoll - AC_SUBST([coll_hcoll_CPPFLAGS]) - AC_SUBST([coll_hcoll_LDFLAGS]) - AC_SUBST([coll_hcoll_LIBS]) -])dnl - diff --git a/ompi/mca/coll/hcoll/owner.txt b/ompi/mca/coll/hcoll/owner.txt deleted file mode 100644 index 8dacea65a6d..00000000000 --- a/ompi/mca/coll/hcoll/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: MELLANOX -status: active diff --git a/ompi/op/op.c b/ompi/op/op.c index c800dc0a1cb..11064ec6f59 100644 --- a/ompi/op/op.c +++ b/ompi/op/op.c @@ -284,10 +284,6 @@ int ompi_op_init(void) FLAGS, "MPI_NO_OP")) { return OMPI_ERROR; }else{ -/* This code is placed back here to support - * HCOL allreduce at the moment. It is a part of bgate repository only. This conflict with OMPI v1.7 - * is to be resolved some other way. - * */ ompi_mpi_op_null.op.op_type = OMPI_OP_NULL; ompi_mpi_op_max.op.op_type = OMPI_OP_MAX; ompi_mpi_op_min.op.op_type = OMPI_OP_MIN; diff --git a/oshmem/mca/scoll/basic/scoll_basic.h b/oshmem/mca/scoll/basic/scoll_basic.h index 73365c62b4e..77760bbae93 100644 --- a/oshmem/mca/scoll/basic/scoll_basic.h +++ b/oshmem/mca/scoll/basic/scoll_basic.h @@ -23,7 +23,7 @@ BEGIN_C_DECLS * In case of shmem, the implementation of broadcast doesn't require * each process to know message size ( just root should know). * It differs from other implementations, so it may cause problems if - * BCAST_FUNC is a callback to another implementation (e.g, fca, hcoll). + * BCAST_FUNC is a callback to another implementation (e.g, fca). * So we replace a callback (group->g_scoll.scoll_[func]) * with a corresponding basic function. */