diff --git a/3rd-party/openpmix b/3rd-party/openpmix index 85a25ee5151..0dadf0def57 160000 --- a/3rd-party/openpmix +++ b/3rd-party/openpmix @@ -1 +1 @@ -Subproject commit 85a25ee515174eff39b9f1457cb0b2e8f2fc1db6 +Subproject commit 0dadf0def579ae4a760d32d80d084e84cc3573e3 diff --git a/3rd-party/prrte b/3rd-party/prrte index e6b8de51806..5b666f963f8 160000 --- a/3rd-party/prrte +++ b/3rd-party/prrte @@ -1 +1 @@ -Subproject commit e6b8de5180696434fafc93df6280bbef7f8d574d +Subproject commit 5b666f963f8d68a4751d64bef3b3294665cea9e9 diff --git a/NEWS b/NEWS index acfc4ffaf33..16d926bf56a 100644 --- a/NEWS +++ b/NEWS @@ -28,6 +28,8 @@ Additional copyrights may follow $HEADER$ +exicting new stuff + =========================================================================== This file contains the main features as well as overviews of specific diff --git a/config/ompi_pmix_add_args.m4 b/config/ompi_pmix_add_args.m4 new file mode 100644 index 00000000000..e2b554c0840 --- /dev/null +++ b/config/ompi_pmix_add_args.m4 @@ -0,0 +1,45 @@ +AC_DEFUN([OMPI_PMIX_ADD_ARGS],[ + opal_show_subtitle "PMIx Configuration options" + + AC_ARG_WITH([pmix-platform-patches-dir], + [AC_HELP_STRING([--with-pmix-platform-patches-dir=DIR], + [Location of the platform patches directory. If you use this option, you must also use --with-pmix-platform.])]) + + AC_ARG_WITH([pmix-platform], + [AC_HELP_STRING([--with-pmix-platform=FILE], + [Load options for build from FILE. Options on the + command line not in FILE are used. Options on the + command line and in FILE are replaced by what is on the command line])]) + + AC_ARG_WITH([jansson], + [AC_HELP_STRING([--with-jansson(=DIR)], + [Build jansson support (default=no), optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])]) + AC_ARG_WITH([jansson-libdir], + [AC_HELP_STRING([--with-jansson-libdir=DIR], + [Search for Jansson libraries in DIR])]) + + AC_ARG_WITH([curl], + [AC_HELP_STRING([--with-curl(=DIR)], + [Build curl support (default=no), optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])]) + AC_ARG_WITH([curl-libdir], + [AC_HELP_STRING([--with-curl-libdir=DIR], + [Search for Curl libraries in DIR])]) + + AC_ARG_WITH([zlib], + [AC_HELP_STRING([--with-zlib=DIR], + [Search for zlib headers and libraries in DIR ])]) + AC_ARG_WITH([zlib-libdir], + [AC_HELP_STRING([--with-zlib-libdir=DIR], + [Search for zlib libraries in DIR ])]) + + AC_ARG_WITH([munge], + [AC_HELP_STRING([--with-munge=DIR], + [Search for munge headers and libraries in DIR ])]) + AC_ARG_WITH([munge-libdir], + [AC_HELP_STRING([--with-munge-libdir=DIR], + [Search for munge libraries in DIR ])]) + + AC_ARG_ENABLE([dstore-pthlck], + [AC_HELP_STRING([--disable-dstore-pthlck], + [Disable pthread-based locking in dstor (default: enabled)])]) +]) diff --git a/config/ompi_prrte_add_args.m4 b/config/ompi_prrte_add_args.m4 new file mode 100644 index 00000000000..78a4144d6cc --- /dev/null +++ b/config/ompi_prrte_add_args.m4 @@ -0,0 +1,53 @@ +AC_DEFUN([OMPI_PRRTE_ADD_ARGS],[ + opal_show_subtitle "PRRTE Configuration options" + + AC_ARG_WITH([alps], + [AC_HELP_STRING([--with-alps(=DIR|yes|no)], + [Build with ALPS scheduler component, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries (default: auto)])],[],with_alps=auto) + AC_ARG_WITH([alps-libdir], + [AC_HELP_STRING([--with-alps-libdir=DIR], + [Location of alps libraries (alpslli, alpsutil) (default: /usr/lib/alps (/opt/cray/xe-sysroot/default/user on eslogin nodes))])]) + + AC_ARG_WITH([sge], + [AC_HELP_STRING([--with-sge], + [Build SGE or Grid Engine support (default: no)])]) + + AC_ARG_WITH([tm], + [AC_HELP_STRING([--with-tm(=DIR)], + [Build TM (Torque, PBSPro, and compatible) support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])]) + + AC_ARG_WITH([moab], + [AC_HELP_STRING([--with-moab], + [Build MOAB scheduler component (default: yes)])]) + AC_ARG_WITH([moab-libdir], + [AC_HELP_STRING([--with-moab-libdir=DIR], + [Search for Moab libraries in DIR])]) + + AC_ARG_WITH([lsf], + [AC_HELP_STRING([--with-lsf(=DIR)], + [Build LSF support])]) + AC_ARG_WITH([lsf-libdir], + [AC_HELP_STRING([--with-lsf-libdir=DIR], + [Search for LSF libraries in DIR])]) + + AC_ARG_WITH([slurm], + [AC_HELP_STRING([--with-slurm], + [Build SLURM scheduler component (default: yes)])]) + + AC_ARG_WITH([singularity], + [AC_HELP_STRING([--with-singularity(=DIR)], + [Build support for the Singularity container, optionally adding DIR to the search path])]) + + AC_ARG_WITH([prte-platform-patches-dir], + [AC_HELP_STRING([--with-prte-platform-patches-dir=DIR], + [Location of the platform patches directory. If you use this option, you must also use --with-prte-platform.])]) + AC_ARG_WITH([prte-platform], + [AC_HELP_STRING([--with-prte-platform=FILE], + [Load options for build from FILE. Options on the + command line not in FILE are used. Options on the + command line and in FILE are replaced by what is on the command line])]) + + AC_ARG_ENABLE([prte-ft], + [AC_HELP_STRING([--enable-prte-ft], + [Enable PRRTE fault tolerance support (default: disabled)])]) +]) diff --git a/config/opal_check_cuda.m4 b/config/opal_check_cuda.m4 index fd7816e3ea7..67059a8c851 100644 --- a/config/opal_check_cuda.m4 +++ b/config/opal_check_cuda.m4 @@ -91,8 +91,8 @@ AS_IF([test "$opal_check_cuda_happy" = "yes"], # If we have CUDA support, check to see if we have support for SYNC_MEMOPS # which was first introduced in CUDA 6.0. AS_IF([test "$opal_check_cuda_happy"="yes"], - AC_CHECK_DECL([CU_POINTER_ATTRIBUTE_SYNC_MEMOPS], [CUDA_SYNC_MEMOPS=1], [CUDA_SYNC_MEMOPS=0], - [#include <$opal_cuda_incdir/cuda.h>]), + [AC_CHECK_DECL([CU_POINTER_ATTRIBUTE_SYNC_MEMOPS], [CUDA_SYNC_MEMOPS=1], [CUDA_SYNC_MEMOPS=0], + [#include <$opal_cuda_incdir/cuda.h>])], []) # If we have CUDA support, check to see if we have CUDA 6.0 or later. diff --git a/config/opal_check_pmi.m4 b/config/opal_check_pmi.m4 deleted file mode 100644 index 6f18c02cd1c..00000000000 --- a/config/opal_check_pmi.m4 +++ /dev/null @@ -1,290 +0,0 @@ -# -*- shell-script ; indent-tabs-mode:nil -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2019 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights -# reserved. -# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. -# Copyright (c) 2014-2018 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# Copyright (c) 2016 IBM Corporation. All rights reserved. -# Copyright (c) 2020 Triad National Security, LLC. All rights -# reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# define an internal function for checking the existence -# and validity of an external PMIx library -# -# OPAL_CHECK_PMIX_LIB(installdir, libdir, [action-if-valid], [action-if-not-valid]) -AC_DEFUN([OPAL_CHECK_PMIX_LIB],[ - - OPAL_VAR_SCOPE_PUSH([opal_external_pmix_save_CPPFLAGS opal_external_pmix_save_LDFLAGS opal_external_pmix_save_LIBS]) - opal_external_pmix_happy=no - - opal_external_pmix_save_CPPFLAGS=$CPPFLAGS - opal_external_pmix_save_LDFLAGS=$LDFLAGS - opal_external_pmix_save_LIBS=$LIBS - - # Make sure we have the headers and libs in the correct location - AC_MSG_CHECKING([for pmix.h in $1]) - files=`ls $1/pmix.h 2> /dev/null | wc -l` - AS_IF([test "$files" -gt 0], - [AC_MSG_RESULT([found]) - pmix_ext_install_incdir=$1 - opal_external_pmix_header_happy=yes], - [AC_MSG_RESULT([not found]) - AC_MSG_CHECKING([for pmix.h in $1/include]) - files=`ls $1/include/pmix.h 2> /dev/null | wc -l` - AS_IF([test "$files" -gt 0], - [AC_MSG_RESULT([found]) - pmix_ext_install_incdir=$1/include - opal_external_pmix_header_happy=yes], - [AC_MSG_RESULT([not found]) - opal_external_pmix_header_happy=no])]) - - AS_IF([test "$opal_external_pmix_header_happy" = "yes"], - [AS_IF([test -n "$2"], - [AC_MSG_CHECKING([libpmix.* in $2]) - files=`ls $2/libpmix.* 2> /dev/null | wc -l` - AS_IF([test "$files" -gt 0], - [AC_MSG_RESULT([found]) - pmix_ext_install_libdir=$2], - [AC_MSG_RESULT([not found]) - AC_MSG_CHECKING([libpmix.* in $2/lib64]) - files=`ls $2/lib64/libpmix.* 2> /dev/null | wc -l` - AS_IF([test "$files" -gt 0], - [AC_MSG_RESULT([found]) - pmix_ext_install_libdir=$2/lib64], - [AC_MSG_RESULT([not found]) - AC_MSG_CHECKING([libpmix.* in $2/lib]) - files=`ls $2/lib/libpmix.* 2> /dev/null | wc -l` - AS_IF([test "$files" -gt 0], - [AC_MSG_RESULT([found]) - pmix_ext_install_libdir=$2/lib], - [AC_MSG_RESULT([not found]) - AC_MSG_ERROR([Cannot continue])])])])], - [# check for presence of lib64 directory - if found, see if the - # desired library is present and matches our build requirements - AC_MSG_CHECKING([libpmix.* in $1/lib64]) - files=`ls $1/lib64/libpmix.* 2> /dev/null | wc -l` - AS_IF([test "$files" -gt 0], - [AC_MSG_RESULT([found]) - pmix_ext_install_libdir=$1/lib64], - [AC_MSG_RESULT([not found]) - AC_MSG_CHECKING([libpmix.* in $1/lib]) - files=`ls $1/lib/libpmix.* 2> /dev/null | wc -l` - AS_IF([test "$files" -gt 0], - [AC_MSG_RESULT([found]) - pmix_ext_install_libdir=$1/lib], - [AC_MSG_RESULT([not found]) - AC_MSG_ERROR([Cannot continue])])])]) - - # check the version - # if the pmix_version.h file does not exist, then - # this must be from a pre-1.1.5 version OMPI does - # NOT support anything older than v1.2.5 - AC_MSG_CHECKING([PMIx version]) - AS_IF([test "$pmix_ext_install_incdir" != "/usr" && test "$pmix_ext_install_incdir" != "/usr/include"], - [CPPFLAGS="-I$pmix_ext_install_incdir $CPPFLAGS"]) - AS_IF([test "$pmix_ext_install_libdir" != "/usr" && test "$pmix_ext_install_libdir" != "/usr/include"], - [LDFLAGS="-L$pmix_ext_install_libdir $LDFLAGS"]) - LIBS="$LIBS -lpmix" - - AS_IF([test "x`ls $1/include/pmix_version.h 2> /dev/null`" = "x"], - [AC_MSG_RESULT([version file not found - assuming v1.1.4]) - opal_external_pmix_version_found=1 - opal_external_pmix_happy=no - opal_external_pmix_version=internal], - [AC_MSG_RESULT([version file found]) - opal_external_pmix_version_found=0]) - - # if it does exist, then we need to parse it to find - # the actual release series - AS_IF([test "$opal_external_pmix_version_found" = "0"], - [AC_MSG_CHECKING([version 4x]) - AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ - #include - #if (PMIX_VERSION_MAJOR < 4L) - #error "not version 4 or above" - #endif - ], [])], - [AC_MSG_RESULT([found]) - opal_external_pmix_version=4x - opal_numerical_pmix_version=4 - opal_external_pmix_version_found=1 - opal_external_pmix_happy=yes], - [AC_MSG_RESULT([not found])])]) - - AS_IF([test "$opal_external_pmix_version_found" = "0"], - [AC_MSG_CHECKING([version 3x or above]) - AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ - #include - #if (PMIX_VERSION_MAJOR != 3L) - #error "not version 3" - #endif - ], [])], - [AC_MSG_RESULT([found]) - opal_external_pmix_version=3x - opal_numerical_pmix_version=3 - opal_external_pmix_version_found=1 - opal_external_pmix_happy=yes], - [AC_MSG_RESULT([not found])])]) - - AS_IF([test "$opal_external_pmix_version_found" = "0"], - [AC_MSG_CHECKING([version 2x]) - AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ - #include - #if (PMIX_VERSION_MAJOR != 2L) - #error "not version 2" - #endif - ], [])], - [AC_MSG_RESULT([found]) - opal_external_pmix_version=2x - opal_numerical_pmix_version=2 - opal_external_pmix_version_found=1 - opal_external_pmix_happy=yes], - [AC_MSG_RESULT([not found])])]) - - AS_IF([test "$opal_external_pmix_version_found" = "0"], - [AC_MSG_CHECKING([version 1x]) - AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ - #include - #if (PMIX_VERSION_MAJOR != 1L && PMIX_VERSION_MINOR != 2L) - #error "not version 1.2.x" - #endif - ], [])], - [AC_MSG_RESULT([found]) - opal_external_pmix_version=1x - opal_numerical_pmix_version=1 - opal_external_pmix_version_found=1 - opal_external_have_pmix1=1 - opal_external_pmix_happy=yes], - [AC_MSG_RESULT([not found])])]) - - AS_IF([test "x$opal_external_pmix_version" = "x"], - [AC_MSG_WARN([External PMIx support detected, but version]) - AC_MSG_WARN([information of the external lib could not]) - AC_MSG_WARN([be detected]) - opal_external_pmix_happy=no]) - - ]) - AS_IF([test "$opal_external_pmix_happy" = "yes" && test $opal_numerical_pmix_version -lt 3], - [AC_MSG_WARN([OMPI no longer supports PMIx versions prior to v3]) - AC_MSG_WARN([Please direct us to a more current PMIx release or]) - AC_MSG_WARN([use the internally provided one]) - AC_MSG_ERROR([Cannot continue])]) - - AS_IF([test "$opal_external_pmix_happy" = "yes"], - [$3 - # add the new flags to our wrapper compilers - AS_IF([test "$pmix_ext_install_incdir" != "/usr" && test "$pmix_ext_install_incdir" != "/usr/include"], - [pmix_external_WRAPPER_EXTRA_CPPFLAGS="-I$pmix_ext_install_incdir"]) - AS_IF([test "$pmix_ext_install_libdir" != "/usr" && test "$pmix_ext_install_libdir" != "/usr/include"], - [pmix_external_WRAPPER_EXTRA_LDFLAGS="-L$pmix_ext_install_libdir" - pmix_external_WRAPPER_EXTRA_LIBS="-lpmix"])], - [$4]) - -dnl swap back in original LDFLAGS, LIBS to avoid messing up subsequent configury checks -dnl don't swap back in orig CFLAGS as there are lots of places where the external pmix -dnl header file location needs to be known - LDFLAGS=$opal_external_pmix_save_LDFLAGS - LIBS=$opal_external_pmix_save_LIBS - - OPAL_VAR_SCOPE_POP -]) - - -AC_DEFUN([OPAL_CHECK_PMIX],[ - - AC_ARG_WITH([pmix], - [AC_HELP_STRING([--with-pmix(=DIR)], - [Build PMIx support. DIR can take one of three values: "internal", "external", or a valid directory name. "internal" (or no DIR value) forces Open MPI to use its internal copy of PMIx. "external" forces Open MPI to use an external installation of PMIx. Supplying a valid directory name also forces Open MPI to use an external installation of PMIx, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries. Note that Open MPI does not support --without-pmix.])]) - - AC_ARG_WITH([pmix-libdir], - [AC_HELP_STRING([--with-pmix-libdir=DIR], - [Look for libpmix in the given directory DIR, DIR/lib or DIR/lib64])]) - - AS_IF([test "$with_pmix" = "no"], - [AC_MSG_WARN([Open MPI requires PMIx support. It can be built]) - AC_MSG_WARN([with either its own internal copy of PMIx, or with]) - AC_MSG_WARN([an external copy that you supply.]) - AC_MSG_ERROR([Cannot continue])]) - - opal_external_have_pmix1=0 - AC_MSG_CHECKING([if user requested internal PMIx support($with_pmix)]) - opal_external_pmix_happy=no - pmix_ext_install_libdir= - pmix_ext_install_dir= - - AS_IF([test "$with_pmix" = "internal"], - [AC_MSG_RESULT([yes]) - opal_external_pmix_happy=no - opal_external_pmix_version=internal - opal_enable_pmix=yes], - - [AC_MSG_RESULT([no]) - # check for external pmix lib */ - AS_IF([test -z "$with_pmix" || test "$with_pmix" = "yes" || test "$with_pmix" = "external"], - [pmix_ext_install_dir=/usr], - [pmix_ext_install_dir=$with_pmix]) - AS_IF([test -n "$with_pmix_libdir"], - [pmix_ext_install_libdir=$with_pmix_libdir]) - OPAL_CHECK_PMIX_LIB([$pmix_ext_install_dir], - [$pmix_ext_install_libdir], - [opal_external_pmix_happy=yes - opal_enable_pmix=yes], - [opal_external_pmix_happy=no])]) - - # Final check - if they explicitly pointed us at an external - # installation that wasn't acceptable, then error out - AS_IF([test -n "$with_pmix" && test "$with_pmix" != "yes" && test "$with_pmix" != "external" && test "$with_pmix" != "internal" && test "$opal_external_pmix_happy" = "no"], - [AC_MSG_WARN([External PMIx support requested, but either the version]) - AC_MSG_WARN([of the external lib was not supported or the required]) - AC_MSG_WARN([header/library files were not found]) - AC_MSG_ERROR([Cannot continue])]) - - # Final check - if they didn't point us explicitly at an external version - # but we found one anyway, use the internal version if it is higher - AS_IF([test "$opal_external_pmix_version" != "internal" && (test -z "$with_pmix" || test "$with_pmix" = "yes")], - [AS_IF([test "$opal_external_pmix_version" != "4x"], - [AC_MSG_WARN([discovered external PMIx version is less than internal version 4.x]) - AC_MSG_WARN([using internal PMIx]) - opal_external_pmix_version=internal - opal_external_pmix_happy=no])]) - - AC_MSG_CHECKING([PMIx version to be used]) - AS_IF([test "$opal_external_pmix_happy" = "yes"], - [AC_MSG_RESULT([external($opal_external_pmix_version)]) - AS_IF([test "$pmix_ext_install_dir" != "/usr"], - [opal_external_pmix_CPPFLAGS="-I$pmix_ext_install_dir/include" - opal_external_pmix_LDFLAGS=-L$pmix_ext_install_libdir]) - opal_external_pmix_LIBS="-lpmix"], - [AC_MSG_RESULT([internal])]) - - AC_DEFINE_UNQUOTED([OPAL_PMIX_V1],[$opal_external_have_pmix1], - [Whether the external PMIx library is v1]) - - AC_SUBST(opal_external_pmix_LDFLAGS) - AC_SUBST(opal_external_pmix_LIBS) - - AS_IF([test "$opal_external_pmix_happy" = "yes"], - [AS_IF([test "$opal_external_pmix_version" = "1x"], - [OPAL_SUMMARY_ADD([[Miscellaneous]],[[PMIx support]], [opal_pmix], [External (1.2.5) WARNING - DYNAMIC OPS NOT SUPPORTED])], - [OPAL_SUMMARY_ADD([[Miscellaneous]],[[PMIx support]], [opal_pmix], [External ($opal_external_pmix_version)])])], - [OPAL_SUMMARY_ADD([[Miscellaneous]], [[PMIx support]], [opal_pmix], [Internal])]) -]) diff --git a/config/opal_config_pmix.m4 b/config/opal_config_pmix.m4 index 9460139e275..d77f98d59d2 100644 --- a/config/opal_config_pmix.m4 +++ b/config/opal_config_pmix.m4 @@ -14,7 +14,7 @@ dnl Copyright (c) 2009-2019 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights dnl reserved. dnl Copyright (c) 2014-2020 Intel, Inc. All rights reserved. -dnl Copyright (c) 2014-2018 Research Organization for Information Science +dnl Copyright (c) 2014-2021 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl Copyright (c) 2016 IBM Corporation. All rights reserved. dnl Copyright (c) 2020 Triad National Security, LLC. All rights @@ -104,7 +104,7 @@ AC_DEFUN([OPAL_CONFIG_PMIX], [ # unless internal specifically requested by the user, try to find # an external that works. external_pmix_happy=0 - AS_IF([test "opal_pmix_mode" != "internal"], + AS_IF([test "$opal_pmix_mode" != "internal"], [_OPAL_CONFIG_PMIX_EXTERNAL( [external_pmix_happy=1 opal_pmix_mode="external"], diff --git a/configure.ac b/configure.ac index fc4e18fb7f7..594b9fd786f 100644 --- a/configure.ac +++ b/configure.ac @@ -30,6 +30,7 @@ # reserved. # Copyright (c) 2020 Amazon.com, Inc. or its affiliates. # All Rights reserved. +# Copyright (c) 2021 Nanook Consulting All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -295,6 +296,8 @@ OPAL_CHECK_CUDA m4_ifdef([project_ompi], [OMPI_CONFIGURE_OPTIONS]) m4_ifdef([project_oshmem], [OSHMEM_CONFIGURE_OPTIONS]) +OMPI_PMIX_ADD_ARGS +OMPI_PRRTE_ADD_ARGS # Set up project specific AM_CONDITIONALs AS_IF([test "$enable_ompi" != "no"], [project_ompi_amc=true], [project_ompi_amc=false]) diff --git a/ompi/errhandler/errhandler.c b/ompi/errhandler/errhandler.c index 8beaf8a3139..28d925dbf37 100644 --- a/ompi/errhandler/errhandler.c +++ b/ompi/errhandler/errhandler.c @@ -367,8 +367,6 @@ static void ompi_errhandler_construct(ompi_errhandler_t *new_errhandler) new_errhandler->eh_file_fn = NULL; new_errhandler->eh_fort_fn = NULL; - new_errhandler->eh_cxx_dispatch_fn = NULL; - memset (new_errhandler->eh_name, 0, MPI_MAX_OBJECT_NAME); } diff --git a/ompi/errhandler/errhandler.h b/ompi/errhandler/errhandler.h index a41446159af..b00296096a7 100644 --- a/ompi/errhandler/errhandler.h +++ b/ompi/errhandler/errhandler.h @@ -71,7 +71,6 @@ typedef void (ompi_errhandler_generic_handler_fn_t)(void *, int *, ...); */ enum ompi_errhandler_lang_t { OMPI_ERRHANDLER_LANG_C, - OMPI_ERRHANDLER_LANG_CXX, OMPI_ERRHANDLER_LANG_FORTRAN }; typedef enum ompi_errhandler_lang_t ompi_errhandler_lang_t; @@ -89,17 +88,6 @@ enum ompi_errhandler_type_t { typedef enum ompi_errhandler_type_t ompi_errhandler_type_t; -/* - * Need to forward declare this for use in ompi_errhandle_cxx_dispatch_fn_t. - */ -struct ompi_errhandler_t; - -/** - * C++ invocation function signature - */ -typedef void (ompi_errhandler_cxx_dispatch_fn_t)(void *handle, int *err_code, - const char *message, ompi_errhandler_generic_handler_fn_t *fn); - /** * Back-end type for MPI_Errorhandler. */ @@ -123,14 +111,6 @@ struct ompi_errhandler_t { MPI_Win_errhandler_function *eh_win_fn; ompi_errhandler_fortran_handler_fn_t *eh_fort_fn; - /* Have separate callback for C++ errhandlers. This pointer is - initialized to NULL and will be set explicitly by the C++ - bindings for Create_errhandler. This function is invoked - when eh_lang==OMPI_ERRHANDLER_LANG_CXX so that the user's - callback function can be invoked with the right language - semantics. */ - ompi_errhandler_cxx_dispatch_fn_t *eh_cxx_dispatch_fn; - /* index in Fortran <-> C translation array */ int eh_f_to_c_index; }; diff --git a/ompi/errhandler/errhandler_invoke.c b/ompi/errhandler/errhandler_invoke.c index b1ff233e821..99ea58754c3 100644 --- a/ompi/errhandler/errhandler_invoke.c +++ b/ompi/errhandler/errhandler_invoke.c @@ -48,7 +48,17 @@ int ompi_errhandler_invoke(ompi_errhandler_t *errhandler, void *mpi_object, if (state >= OMPI_MPI_STATE_INIT_COMPLETED && state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) { comm = (ompi_mpi_compat_mpi3)? &ompi_mpi_comm_world.comm: &ompi_mpi_comm_self.comm; - comm->error_handler->eh_comm_fn(&comm, &err_code, message, NULL); + switch (comm->error_handler->eh_lang) { + case OMPI_ERRHANDLER_LANG_C: + comm->error_handler->eh_comm_fn(&comm, &err_code, message, NULL); + break; + + case OMPI_ERRHANDLER_LANG_FORTRAN: + fortran_handle = OMPI_INT_2_FINT(comm->c_f_to_c_index); + comm->error_handler->eh_fort_fn(&fortran_handle, &fortran_err_code); + err_code = OMPI_FINT_2_INT(fortran_err_code); + break; + } } else { if(NULL == ompi_initial_error_handler) { @@ -74,11 +84,6 @@ int ompi_errhandler_invoke(ompi_errhandler_t *errhandler, void *mpi_object, errhandler->eh_comm_fn(&comm, &err_code, message, NULL); break; - case OMPI_ERRHANDLER_LANG_CXX: - errhandler->eh_cxx_dispatch_fn(&comm, &err_code, message, - (ompi_errhandler_generic_handler_fn_t *)errhandler->eh_comm_fn); - break; - case OMPI_ERRHANDLER_LANG_FORTRAN: fortran_handle = OMPI_INT_2_FINT(comm->c_f_to_c_index); errhandler->eh_fort_fn(&fortran_handle, &fortran_err_code); @@ -94,11 +99,6 @@ int ompi_errhandler_invoke(ompi_errhandler_t *errhandler, void *mpi_object, errhandler->eh_win_fn(&win, &err_code, message, NULL); break; - case OMPI_ERRHANDLER_LANG_CXX: - errhandler->eh_cxx_dispatch_fn(&win, &err_code, message, - (ompi_errhandler_generic_handler_fn_t *)errhandler->eh_win_fn); - break; - case OMPI_ERRHANDLER_LANG_FORTRAN: fortran_handle = OMPI_INT_2_FINT(win->w_f_to_c_index); errhandler->eh_fort_fn(&fortran_handle, &fortran_err_code); @@ -114,11 +114,6 @@ int ompi_errhandler_invoke(ompi_errhandler_t *errhandler, void *mpi_object, errhandler->eh_file_fn(&file, &err_code, message, NULL); break; - case OMPI_ERRHANDLER_LANG_CXX: - errhandler->eh_cxx_dispatch_fn(&file, &err_code, message, - (ompi_errhandler_generic_handler_fn_t *)errhandler->eh_file_fn); - break; - case OMPI_ERRHANDLER_LANG_FORTRAN: fortran_handle = OMPI_INT_2_FINT(file->f_f_to_c_index); errhandler->eh_fort_fn(&fortran_handle, &fortran_err_code); diff --git a/ompi/mca/coll/base/base.h b/ompi/mca/coll/base/base.h index 9fc431f1ff2..9a216fe48a1 100644 --- a/ompi/mca/coll/base/base.h +++ b/ompi/mca/coll/base/base.h @@ -77,7 +77,7 @@ int mca_coll_base_find_available(bool enable_progress_threads, * @return OMPI_ERROR Upon failure. * * Note that the types of the parameters have "struct" in them - * (e.g., ompi_communicator_t" vs. a plain "ompi_communicator_t") to + * (e.g., "struct ompi_communicator_t" vs. a plain "ompi_communicator_t") to * avoid an include file loop. All similar types (e.g., "struct * ompi_communicator_t *", "ompi_communicator_t *", and "MPI_Comm") * are all typedef'ed to be the same, so the fact that we use struct @@ -93,7 +93,7 @@ int mca_coll_base_find_available(bool enable_progress_threads, * invoking this function. Specifically: this function is called in * the depths of communicator creation, but during the execution of * this function, new communicators may be created, and therefore - * communicator creation functions may be re-entered (albiet with + * communicator creation functions may be re-entered (albeit with * different arguments). */ int mca_coll_base_comm_select(struct ompi_communicator_t *comm); diff --git a/ompi/mca/coll/base/coll_base_functions.h b/ompi/mca/coll/base/coll_base_functions.h index 11b46ba47eb..23745819032 100644 --- a/ompi/mca/coll/base/coll_base_functions.h +++ b/ompi/mca/coll/base/coll_base_functions.h @@ -398,7 +398,7 @@ do { \ } while (0) /** - * This macro give a generic way to compute the best count of + * This macro gives a generic way to compute the best count of * the segment (i.e. the number of complete datatypes that * can fit in the specified SEGSIZE). Beware, when this macro * is called, the SEGCOUNT should be initialized to the count as @@ -415,7 +415,7 @@ do { \ } \ /** - * This macro gives a generic wait to compute the well distributed block counts + * This macro gives a generic way to compute the well distributed block counts * when the count and number of blocks are fixed. * Macro returns "early-block" count, "late-block" count, and "split-index" * which is the block at which we switch from "early-block" count to @@ -518,7 +518,7 @@ static inline void ompi_coll_base_free_reqs(ompi_request_t **reqs, int count) /** * Return the array of requests on the data. If the array was not initialized - * or if it's size was too small, allocate it to fit the requested size. + * or if its size was too small, allocate it to fit the requested size. */ ompi_request_t** ompi_coll_base_comm_get_reqs(mca_coll_base_comm_t* data, int nreqs); diff --git a/ompi/mca/coll/coll.h b/ompi/mca/coll/coll.h index 57e4af4ac02..c95a3b1596f 100644 --- a/ompi/mca/coll/coll.h +++ b/ompi/mca/coll/coll.h @@ -39,13 +39,13 @@ * * Component selection is done per commuicator, at Communicator * construction time. mca_coll_base_comm_select() is used to - * create the list of components available to the compoenent + * create the list of components available to the componenent * collm_comm_query function, instantiating a module for each - * component that i usable, and sets the module collective function pointers. + * component that is usable, and sets the module collective function pointers. * mca_coll_base_comm_select() then loops through the list of available * components (via the instantiated module), and uses the * module's coll_module_enable() function to enable the modules, and - * if successful, sets the communicator collective functions to the + * if successful, sets the communicator collective functions to * those supplied by the given module, keeping track of which module it * is associated with. * diff --git a/ompi/mca/common/ompio/common_ompio.h b/ompi/mca/common/ompio/common_ompio.h index 33dd0dd4023..1f2bbc585d7 100644 --- a/ompi/mca/common/ompio/common_ompio.h +++ b/ompi/mca/common/ompio/common_ompio.h @@ -67,6 +67,7 @@ #define OMPIO_LOCK_NEVER 0x00000100 #define OMPIO_LOCK_NOT_THIS_OP 0x00000200 #define OMPIO_DATAREP_NATIVE 0x00000400 +#define OMPIO_COLLECTIVE_OP 0x00000800 #define OMPIO_ROOT 0 diff --git a/ompi/mca/fbtl/posix/fbtl_posix.h b/ompi/mca/fbtl/posix/fbtl_posix.h index b9f1c1149ee..e01101f1dca 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix.h +++ b/ompi/mca/fbtl/posix/fbtl_posix.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2018 University of Houston. All rights reserved. + * Copyright (c) 2008-2020 University of Houston. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -29,6 +29,11 @@ #include "ompi/mca/common/ompio/common_ompio_request.h" extern int mca_fbtl_posix_priority; +extern bool mca_fbtl_posix_read_datasieving; +extern bool mca_fbtl_posix_write_datasieving; +extern size_t mca_fbtl_posix_max_block_size; +extern size_t mca_fbtl_posix_max_gap_size; +extern size_t mca_fbtl_posix_max_tmpbuf_size; BEGIN_C_DECLS diff --git a/ompi/mca/fbtl/posix/fbtl_posix_component.c b/ompi/mca/fbtl/posix/fbtl_posix_component.c index 8575c2cad40..32f1d41e1a2 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_component.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_component.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2018 University of Houston. All rights reserved. + * Copyright (c) 2008-2020 University of Houston. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -37,6 +37,15 @@ const char *mca_fbtl_posix_component_version_string = "OMPI/MPI posix FBTL MCA component version " OMPI_VERSION; int mca_fbtl_posix_priority = 10; +bool mca_fbtl_posix_read_datasieving = true; +bool mca_fbtl_posix_write_datasieving = true; +size_t mca_fbtl_posix_max_block_size = 1048576; // 1MB +size_t mca_fbtl_posix_max_gap_size = 4096; // Size of a block in many linux fs +size_t mca_fbtl_posix_max_tmpbuf_size = 67108864; // 64 MB +/* + * Private functions + */ +static int register_component(void); /* * Instantiate the public struct with all of our public information @@ -54,6 +63,7 @@ mca_fbtl_base_component_2_0_0_t mca_fbtl_posix_component = { .mca_component_name = "posix", MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, OMPI_RELEASE_VERSION), + .mca_register_component_params = register_component, }, .fbtlm_data = { /* This component is checkpointable */ @@ -63,3 +73,62 @@ mca_fbtl_base_component_2_0_0_t mca_fbtl_posix_component = { .fbtlm_file_query = mca_fbtl_posix_component_file_query, /* get priority and actions */ .fbtlm_file_unquery = mca_fbtl_posix_component_file_unquery, /* undo what was done by previous function */ }; + +static int register_component(void) +{ + mca_fbtl_posix_priority = 10; + (void) mca_base_component_var_register(&mca_fbtl_posix_component.fbtlm_version, + "priority", "Priority of the fbtl posix component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_fbtl_posix_priority); + + mca_fbtl_posix_max_block_size = 1048576; + (void) mca_base_component_var_register(&mca_fbtl_posix_component.fbtlm_version, + "max_block_size", "Maximum average size in bytes of a data block in an iovec for data sieving. " + "An average block size larger than this parameter will disable data sieving. Default: 1048576 bytes.", + MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_fbtl_posix_max_block_size ); + + mca_fbtl_posix_max_gap_size = 4096; + (void) mca_base_component_var_register(&mca_fbtl_posix_component.fbtlm_version, + "max_gap_size", "Maximum average gap size between two blocks in an iovec for data sieving. " + "An average gap size larger than this parameter will disable data sieving. Default: 4096 bytes. " , + MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_fbtl_posix_max_gap_size ); + + mca_fbtl_posix_max_tmpbuf_size = 67108864; + (void) mca_base_component_var_register(&mca_fbtl_posix_component.fbtlm_version, + "max_tmpbuf_size", "Maximum size of the temporary buffer used for data sieving in bytes. " + "Default: 67108864 (64MB). " , + MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_fbtl_posix_max_tmpbuf_size ); + + mca_fbtl_posix_read_datasieving = true; + (void) mca_base_component_var_register(&mca_fbtl_posix_component.fbtlm_version, + "read_datasieving", "Parameter indicating whether to perform data sieving for read operations. " + "Default: true.", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_fbtl_posix_read_datasieving ); + + mca_fbtl_posix_write_datasieving = true; + (void) mca_base_component_var_register(&mca_fbtl_posix_component.fbtlm_version, + "write_datasieving", "Parameter indicating whether to perform data sieving for write operations. " + "Default: true.", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_fbtl_posix_write_datasieving ); + + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fbtl/posix/fbtl_posix_ipreadv.c b/ompi/mca/fbtl/posix/fbtl_posix_ipreadv.c index 6b5476b584b..81e4ddf050a 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_ipreadv.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_ipreadv.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2008-2021 University of Houston. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -33,6 +33,8 @@ #include "ompi/constants.h" #include "ompi/mca/fbtl/fbtl.h" +#define MAX_ATTEMPTS 10 + ssize_t mca_fbtl_posix_ipreadv (ompio_file_t *fh, ompi_request_t *request) { @@ -44,7 +46,7 @@ ssize_t mca_fbtl_posix_ipreadv (ompio_file_t *fh, data = (mca_fbtl_posix_request_data_t *) malloc ( sizeof (mca_fbtl_posix_request_data_t)); if ( NULL == data ) { - opal_output (1,"could not allocate memory\n"); + opal_output (1,"mca_fbtl_posix_ipreadv: could not allocate memory\n"); return 0; } @@ -56,14 +58,14 @@ ssize_t mca_fbtl_posix_ipreadv (ompio_file_t *fh, data->aio_reqs = (struct aiocb *) malloc (sizeof(struct aiocb) * fh->f_num_of_io_entries); if (NULL == data->aio_reqs) { - opal_output(1, "OUT OF MEMORY\n"); + opal_output(1, "mca_fbtl_posix_ipreadv: could not allocate memory\n"); free(data); return 0; } data->aio_req_status = (int *) malloc (sizeof(int) * fh->f_num_of_io_entries); if (NULL == data->aio_req_status) { - opal_output(1, "OUT OF MEMORY\n"); + opal_output(1, "mca_fbtl_posix_ipreadv: could not allocate memory\n"); free(data->aio_reqs); free(data); return 0; @@ -103,14 +105,22 @@ ssize_t mca_fbtl_posix_ipreadv (ompio_file_t *fh, } for (i=0; i < data->aio_last_active_req; i++) { - if (-1 == aio_read(&data->aio_reqs[i])) { - opal_output(1, "mca_fbtl_posix_ipreadv: error in aio_read(): %s", strerror(errno)); - mca_fbtl_posix_unlock ( &data->aio_lock, data->aio_fh ); - free(data->aio_reqs); - free(data->aio_req_status); - free(data); - return OMPI_ERROR; - } + int counter=0; + while ( MAX_ATTEMPTS > counter ) { + if ( -1 != aio_read(&data->aio_reqs[i]) ) { + break; + } + counter++; + mca_common_ompio_progress(); + } + if ( MAX_ATTEMPTS == counter ) { + opal_output(1, "mca_fbtl_posix_ipreadv: error in aio_read(): errno %d %s", errno, strerror(errno)); + mca_fbtl_posix_unlock ( &data->aio_lock, data->aio_fh ); + free(data->aio_reqs); + free(data->aio_req_status); + free(data); + return OMPI_ERROR; + } } req->req_data = data; diff --git a/ompi/mca/fbtl/posix/fbtl_posix_ipwritev.c b/ompi/mca/fbtl/posix/fbtl_posix_ipwritev.c index 0cabdc14085..6338f88b0a1 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_ipwritev.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_ipwritev.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2008-2021 University of Houston. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -32,6 +32,8 @@ #include "ompi/constants.h" #include "ompi/mca/fbtl/fbtl.h" +#define MAX_ATTEMPTS 10 + ssize_t mca_fbtl_posix_ipwritev (ompio_file_t *fh, ompi_request_t *request) { @@ -43,7 +45,7 @@ ssize_t mca_fbtl_posix_ipwritev (ompio_file_t *fh, data = (mca_fbtl_posix_request_data_t *) malloc ( sizeof (mca_fbtl_posix_request_data_t)); if ( NULL == data ) { - opal_output (1,"could not allocate memory\n"); + opal_output (1,"mca_fbtl_posix_ipwritev: could not allocate memory\n"); return 0; } @@ -55,14 +57,14 @@ ssize_t mca_fbtl_posix_ipwritev (ompio_file_t *fh, data->aio_reqs = (struct aiocb *) malloc (sizeof(struct aiocb) * fh->f_num_of_io_entries); if (NULL == data->aio_reqs) { - opal_output(1, "OUT OF MEMORY\n"); + opal_output (1,"mca_fbtl_posix_ipwritev: could not allocate memory\n"); free(data); return 0; } data->aio_req_status = (int *) malloc (sizeof(int) * fh->f_num_of_io_entries); if (NULL == data->aio_req_status) { - opal_output(1, "OUT OF MEMORY\n"); + opal_output (1,"mca_fbtl_posix_ipwritev: could not allocate memory\n"); free(data->aio_reqs); free(data); return 0; @@ -102,7 +104,15 @@ ssize_t mca_fbtl_posix_ipwritev (ompio_file_t *fh, } for (i=0; i < data->aio_last_active_req; i++) { - if (-1 == aio_write(&data->aio_reqs[i])) { + int counter=0; + while ( MAX_ATTEMPTS > counter ) { + if (-1 != aio_write(&data->aio_reqs[i])) { + break; + } + counter++; + mca_common_ompio_progress(); + } + if ( MAX_ATTEMPTS == counter ) { opal_output(1, "mca_fbtl_posix_ipwritev: error in aio_write(): %s", strerror(errno)); mca_fbtl_posix_unlock ( &data->aio_lock, data->aio_fh ); free(data->aio_req_status); diff --git a/ompi/mca/fbtl/posix/fbtl_posix_preadv.c b/ompi/mca/fbtl/posix/fbtl_posix_preadv.c index f8a031a9264..89a819a6e23 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_preadv.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_preadv.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2017 University of Houston. All rights reserved. + * Copyright (c) 2008-2020 University of Houston. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -28,100 +28,260 @@ #include "ompi/constants.h" #include "ompi/mca/fbtl/fbtl.h" + +static ssize_t mca_fbtl_posix_preadv_datasieving (ompio_file_t *fh); +static ssize_t mca_fbtl_posix_preadv_generic (ompio_file_t *fh); + ssize_t mca_fbtl_posix_preadv (ompio_file_t *fh ) { - /*int *fp = NULL;*/ - int i, block=1, ret; - struct iovec *iov = NULL; - int iov_count = 0; - OMPI_MPI_OFFSET_TYPE iov_offset = 0; ssize_t bytes_read=0, ret_code=0; struct flock lock; - off_t total_length, end_offset=0; + int ret; if (NULL == fh->f_io_array) { return OMPI_ERROR; } + + if ( fh->f_num_of_io_entries > 1 ) { + bool do_data_sieving = true; + + size_t avg_gap_size=0; + size_t avg_block_size = 0; + off_t prev_offset = (off_t)fh->f_io_array[0].offset; + int i; + for ( i=0; i< fh->f_num_of_io_entries; i++ ) { + avg_block_size += fh->f_io_array[i].length; + avg_gap_size += (size_t)((off_t)fh->f_io_array[i].offset - prev_offset); + prev_offset = (off_t)fh->f_io_array[i].offset; + } + avg_block_size = avg_block_size / fh->f_num_of_io_entries; + avg_gap_size = avg_gap_size / fh->f_num_of_io_entries; + + if ( false == mca_fbtl_posix_read_datasieving || + 0 == avg_gap_size || + avg_block_size > mca_fbtl_posix_max_block_size || + avg_gap_size > mca_fbtl_posix_max_gap_size ) { + do_data_sieving = false; + } - iov = (struct iovec *) malloc - (OMPIO_IOVEC_INITIAL_SIZE * sizeof (struct iovec)); + if ( do_data_sieving) { + return mca_fbtl_posix_preadv_datasieving (fh); + } + else { + return mca_fbtl_posix_preadv_generic (fh); + } + } + else { + // i.e. fh->f_num_of_io_entries == 1 + ret = mca_fbtl_posix_lock ( &lock, fh, F_RDLCK, (off_t)fh->f_io_array[0].offset, + (off_t)fh->f_io_array[0].length, OMPIO_LOCK_ENTIRE_REGION ); + if ( 0 < ret ) { + opal_output(1, "mca_fbtl_posix_preadv: error in mca_fbtl_posix_lock() ret=%d: %s", + ret, strerror(errno)); + /* Just in case some part of the lock worked */ + mca_fbtl_posix_unlock ( &lock, fh); + return OMPI_ERROR; + } + + ret_code = pread(fh->fd, fh->f_io_array[0].memory_address, fh->f_io_array[0].length, + (off_t)fh->f_io_array[0].offset ); + mca_fbtl_posix_unlock ( &lock, fh ); + if ( ret_code == -1 ) { + opal_output(1, "mca_fbtl_posix_preadv: error in (p)read(v):%s", strerror(errno)); + return OMPI_ERROR; + } + + bytes_read += ret_code; + } + + return bytes_read; +} + +ssize_t mca_fbtl_posix_preadv_datasieving (ompio_file_t *fh) +{ + size_t start, end, len; + size_t bufsize = 0; + int ret, i, j; + ssize_t bytes_read=0, ret_code=0; + struct flock lock; + char *temp_buf = NULL; + + int startindex = 0; + int endindex = 0; + bool done = false; + + while (!done) { + // Break the io_array into chunks such that the size of the temporary + // buffer does not exceed mca_fbtl_posix_max_tmpbuf_size bytes. + // Each iteration will thus work in the range (startindex, endindex[ + startindex = endindex; + if ( startindex >= fh->f_num_of_io_entries ) { + done = true; + break; + } + + size_t sstart = (size_t)fh->f_io_array[startindex].offset; + size_t slen=0; + + for ( j = startindex; j < fh->f_num_of_io_entries; j++ ) { + endindex = j; + slen = ((size_t)fh->f_io_array[j].offset + fh->f_io_array[j].length) - sstart; + if (slen > mca_fbtl_posix_max_tmpbuf_size ) { + endindex = j-1; + break; + } + } + // Need to increment the value of endindex + // by one for the loop syntax to work correctly. + endindex++; + + start = (size_t)fh->f_io_array[startindex].offset; + end = (size_t)fh->f_io_array[endindex-1].offset + fh->f_io_array[endindex-1].length; + len = end - start; + + if ( len > bufsize ) { + if ( NULL != temp_buf ) { + free ( temp_buf); + } + temp_buf = (char *) malloc ( len ); + if ( NULL == temp_buf ) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + bufsize = len; + } + + // Read the entire block. + ret = mca_fbtl_posix_lock ( &lock, fh, F_RDLCK, start, len, OMPIO_LOCK_ENTIRE_REGION ); + if ( 0 < ret ) { + opal_output(1, "mca_fbtl_posix_preadv_datasieving: error in mca_fbtl_posix_lock() ret=%d: %s", + ret, strerror(errno)); + /* Just in case some part of the lock worked */ + mca_fbtl_posix_unlock ( &lock, fh); + free ( temp_buf); + return OMPI_ERROR; + } + + ret_code = pread (fh->fd, temp_buf, len, start); + mca_fbtl_posix_unlock ( &lock, fh); + if ( ret_code == -1 ) { + opal_output(1, "mca_fbtl_posix_preadv_datasieving: error in (p)read(v):%s", strerror(errno)); + free ( temp_buf); + return OMPI_ERROR; + } + + // Copy out the elements that were requested. + size_t pos = 0; + size_t num_bytes; + size_t start_offset = (size_t) fh->f_io_array[startindex].offset; + for ( i = startindex ; i < endindex ; i++) { + pos = (size_t) fh->f_io_array[i].offset - start_offset; + if ( (ssize_t) pos > ret_code ) { + break; + } + num_bytes = fh->f_io_array[i].length; + if ( ((ssize_t) pos + (ssize_t)num_bytes) > ret_code ) { + num_bytes = ret_code - (ssize_t)pos; + } + + memcpy (fh->f_io_array[i].memory_address, temp_buf + pos, num_bytes); + bytes_read += num_bytes; + } + } + + free ( temp_buf); + return bytes_read; +} + +ssize_t mca_fbtl_posix_preadv_generic (ompio_file_t *fh ) +{ + ssize_t bytes_read=0, ret_code=0; + struct iovec *iov = NULL; + struct flock lock; + int ret, i; + + int block=1; + int iov_count = 0; + OMPI_MPI_OFFSET_TYPE iov_offset = 0; + off_t total_length, end_offset=0; + + iov = (struct iovec *) malloc (OMPIO_IOVEC_INITIAL_SIZE * sizeof (struct iovec)); if (NULL == iov) { opal_output(1, "OUT OF MEMORY\n"); return OMPI_ERR_OUT_OF_RESOURCE; } - + for (i=0 ; if_num_of_io_entries ; i++) { - if (0 == iov_count) { - iov[iov_count].iov_base = fh->f_io_array[i].memory_address; - iov[iov_count].iov_len = fh->f_io_array[i].length; - iov_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset; + if (0 == iov_count) { + iov[iov_count].iov_base = fh->f_io_array[i].memory_address; + iov[iov_count].iov_len = fh->f_io_array[i].length; + iov_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset; end_offset = (off_t)fh->f_io_array[i].offset + (off_t)fh->f_io_array[i].length; - iov_count ++; - } - - if (OMPIO_IOVEC_INITIAL_SIZE*block <= iov_count) { - block ++; - iov = (struct iovec *)realloc - (iov, OMPIO_IOVEC_INITIAL_SIZE * block * - sizeof(struct iovec)); - if (NULL == iov) { - opal_output(1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - } - - if (fh->f_num_of_io_entries != i+1) { - if (((((OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset + - (ptrdiff_t)fh->f_io_array[i].length) == - (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i+1].offset)) && - (iov_count < IOV_MAX ) ){ - iov[iov_count].iov_base = - fh->f_io_array[i+1].memory_address; - iov[iov_count].iov_len = fh->f_io_array[i+1].length; - end_offset = (off_t)fh->f_io_array[i].offset + (off_t)fh->f_io_array[i].length; - iov_count ++; - continue; - } - } - + iov_count ++; + } + + if (OMPIO_IOVEC_INITIAL_SIZE*block <= iov_count) { + block ++; + iov = (struct iovec *)realloc + (iov, OMPIO_IOVEC_INITIAL_SIZE * block * + sizeof(struct iovec)); + if (NULL == iov) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + if (fh->f_num_of_io_entries != i+1) { + if (((((OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset + + (ptrdiff_t)fh->f_io_array[i].length) == + (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i+1].offset)) && + (iov_count < IOV_MAX ) ){ + iov[iov_count].iov_base = + fh->f_io_array[i+1].memory_address; + iov[iov_count].iov_len = fh->f_io_array[i+1].length; + end_offset = (off_t)fh->f_io_array[i].offset + (off_t)fh->f_io_array[i].length; + iov_count ++; + continue; + } + } + total_length = (end_offset - (off_t)iov_offset ); - + ret = mca_fbtl_posix_lock ( &lock, fh, F_RDLCK, iov_offset, total_length, OMPIO_LOCK_SELECTIVE ); if ( 0 < ret ) { - opal_output(1, "mca_fbtl_posix_preadv: error in mca_fbtl_posix_lock() ret=%d: %s", ret, strerror(errno)); + opal_output(1, "mca_fbtl_posix_preadv_generic: error in mca_fbtl_posix_lock() ret=%d: %s", ret, strerror(errno)); free (iov); /* Just in case some part of the lock worked */ mca_fbtl_posix_unlock ( &lock, fh); return OMPI_ERROR; } #if defined(HAVE_PREADV) - ret_code = preadv (fh->fd, iov, iov_count, iov_offset); + ret_code = preadv (fh->fd, iov, iov_count, iov_offset); #else - if (-1 == lseek (fh->fd, iov_offset, SEEK_SET)) { - opal_output(1, "mca_fbtl_posix_preadv: error in lseek:%s", strerror(errno)); + if (-1 == lseek (fh->fd, iov_offset, SEEK_SET)) { + opal_output(1, "mca_fbtl_posix_preadv_generic: error in lseek:%s", strerror(errno)); free(iov); mca_fbtl_posix_unlock ( &lock, fh ); - return OMPI_ERROR; - } - ret_code = readv (fh->fd, iov, iov_count); + return OMPI_ERROR; + } + ret_code = readv (fh->fd, iov, iov_count); #endif mca_fbtl_posix_unlock ( &lock, fh ); - if ( 0 < ret_code ) { - bytes_read+=ret_code; - } - else if ( ret_code == -1 ) { - opal_output(1, "mca_fbtl_posix_preadv: error in (p)readv:%s", strerror(errno)); + if ( 0 < ret_code ) { + bytes_read+=ret_code; + } + else if ( ret_code == -1 ) { + opal_output(1, "mca_fbtl_posix_preadv_generic: error in (p)readv:%s", strerror(errno)); free(iov); - return OMPI_ERROR; - } - else if ( 0 == ret_code ){ - /* end of file reached, no point in continue reading; */ - break; - } - iov_count = 0; - } + return OMPI_ERROR; + } + else if ( 0 == ret_code ){ + /* end of file reached, no point in continue reading; */ + break; + } + iov_count = 0; + } free (iov); - return bytes_read; } diff --git a/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c b/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c index 7ad6e6d9d2e..d54e9e09434 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2017 University of Houston. All rights reserved. + * Copyright (c) 2008-2020 University of Houston. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -30,7 +30,187 @@ #include "ompi/constants.h" #include "ompi/mca/fbtl/fbtl.h" +static ssize_t mca_fbtl_posix_pwritev_datasieving (ompio_file_t *fh ); +static ssize_t mca_fbtl_posix_pwritev_generic (ompio_file_t *fh ); + ssize_t mca_fbtl_posix_pwritev(ompio_file_t *fh ) +{ + ssize_t bytes_written=0, ret_code=0; + struct flock lock; + int ret; + + if (NULL == fh->f_io_array) { + return OMPI_ERROR; + } + + if ( fh->f_num_of_io_entries > 1 ) { + bool do_data_sieving = true; + + size_t avg_gap_size=0; + size_t avg_block_size = 0; + off_t prev_offset = (off_t)fh->f_io_array[0].offset; + int i; + for ( i=0; i< fh->f_num_of_io_entries; i++ ) { + avg_block_size += fh->f_io_array[i].length; + avg_gap_size += (size_t)((off_t)fh->f_io_array[i].offset - prev_offset); + prev_offset = (off_t)fh->f_io_array[i].offset; + } + avg_block_size = avg_block_size / fh->f_num_of_io_entries; + avg_gap_size = avg_gap_size / fh->f_num_of_io_entries; + + if ( false == mca_fbtl_posix_write_datasieving || + 0 == avg_gap_size || + avg_block_size > mca_fbtl_posix_max_block_size || + avg_gap_size > mca_fbtl_posix_max_gap_size || + ompi_mpi_thread_multiple || + !(fh->f_flags & OMPIO_COLLECTIVE_OP) ) { + do_data_sieving = false; + } + + if ( do_data_sieving) { + return mca_fbtl_posix_pwritev_datasieving (fh); + } + else { + return mca_fbtl_posix_pwritev_generic (fh); + } + } + else { + // i.e. fh->f_num_of_io_entries == 1 + ret = mca_fbtl_posix_lock ( &lock, fh, F_WRLCK, (off_t)fh->f_io_array[0].offset, + (off_t)fh->f_io_array[0].length, OMPIO_LOCK_ENTIRE_REGION ); + if ( 0 < ret ) { + opal_output(1, "mca_fbtl_posix_pwritev: error in mca_fbtl_posix_lock() ret=%d: %s", + ret, strerror(errno)); + /* Just in case some part of the lock worked */ + mca_fbtl_posix_unlock ( &lock, fh); + return OMPI_ERROR; + } + + ret_code = pwrite(fh->fd, fh->f_io_array[0].memory_address, fh->f_io_array[0].length, + (off_t)fh->f_io_array[0].offset ); + mca_fbtl_posix_unlock ( &lock, fh ); + if ( ret_code == -1 ) { + opal_output(1, "mca_fbtl_posix_pwritev: error in (p)write(v):%s", strerror(errno)); + return OMPI_ERROR; + } + + bytes_written += ret_code; + } + + return bytes_written; +} + +ssize_t mca_fbtl_posix_pwritev_datasieving (ompio_file_t *fh) +{ + size_t start, end, len; + size_t bufsize = 0; + int ret, i, j; + ssize_t bytes_written=0, ret_code=0; + struct flock lock; + char *temp_buf = NULL; + + int startindex = 0; + int endindex = 0; + bool done = false; + + while (!done) { + // Break the io_array into chunks such that the size of the temporary + // buffer does not exceed mca_fbtl_posix_max_tmpbuf_size bytes. + // Each iteration will thus work in the range (startindex, endindex[ + startindex = endindex; + if ( startindex >= fh->f_num_of_io_entries ) { + done = true; + break; + } + + size_t sstart = (size_t)fh->f_io_array[startindex].offset; + size_t slen=0; + + for ( j = startindex; j < fh->f_num_of_io_entries; j++ ) { + endindex = j; + slen = ((size_t)fh->f_io_array[j].offset + fh->f_io_array[j].length) - sstart; + if (slen > mca_fbtl_posix_max_tmpbuf_size ) { + endindex = j-1; + break; + } + } + // Need to increment the value of endindex + // by one for the loop syntax to work correctly. + endindex++; + + start = (size_t)fh->f_io_array[startindex].offset; + end = (size_t)fh->f_io_array[endindex-1].offset + fh->f_io_array[endindex-1].length; + len = end - start; + + if ( len > bufsize ) { + if ( NULL != temp_buf ) { + free ( temp_buf); + } + temp_buf = (char *) malloc ( len ); + if ( NULL == temp_buf ) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + bufsize = len; + } + + // Read the entire block. + ret = mca_fbtl_posix_lock ( &lock, fh, F_WRLCK, start, len, OMPIO_LOCK_ENTIRE_REGION ); + if ( 0 < ret ) { + opal_output(1, "mca_fbtl_posix_pwritev_datasieving: error in mca_fbtl_posix_lock() ret=%d: %s", + ret, strerror(errno)); + /* Just in case some part of the lock worked */ + mca_fbtl_posix_unlock ( &lock, fh); + free ( temp_buf); + return OMPI_ERROR; + } + + ret_code = pread (fh->fd, temp_buf, len, start); + if ( ret_code == -1 ) { + //opal_output(1, "mca_fbtl_posix_pwritev_datasieving: error in pwrite:%s", strerror(errno)); + opal_output(1, "mca_fbtl_posix_pwritev_datasieving: error in pwrite:%s", strerror(errno)); + /* Just in case some part of the lock worked */ + mca_fbtl_posix_unlock ( &lock, fh); + free ( temp_buf); + return OMPI_ERROR; + } + + // Copy out the elements to write into temporary buffer. + size_t pos = 0; + size_t num_bytes; + size_t start_offset = (size_t) fh->f_io_array[startindex].offset; + for ( i = startindex ; i < endindex ; i++) { + pos = (size_t) fh->f_io_array[i].offset - start_offset; + num_bytes = fh->f_io_array[i].length; + memcpy (temp_buf + pos, fh->f_io_array[i].memory_address, num_bytes); + bytes_written += num_bytes; + } + ret_code = pwrite (fh->fd, temp_buf, len, start); + if ( ret_code == -1 ) { + opal_output(1, "mca_fbtl_posix_pwritev_datasieving: error in pwrite:%s", strerror(errno)); + /* Just in case some part of the lock worked */ + mca_fbtl_posix_unlock ( &lock, fh); + free ( temp_buf); + return OMPI_ERROR; + } + + mca_fbtl_posix_unlock ( &lock, fh); + if ( ret_code == -1 ) { + opal_output(1, "mca_fbtl_posix_pwritev_datasieving: error in pwrite:%s", strerror(errno)); + /* Just in case some part of the lock worked */ + mca_fbtl_posix_unlock ( &lock, fh); + free ( temp_buf); + return OMPI_ERROR; + } + + } + + free ( temp_buf); + return bytes_written; +} + + +ssize_t mca_fbtl_posix_pwritev_generic (ompio_file_t *fh ) { /*int *fp = NULL;*/ int i, block = 1, ret; diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2.h b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2.h index bd46710c456..10317ca1801 100644 --- a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2.h +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2008-2020 University of Houston. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -36,7 +36,6 @@ BEGIN_C_DECLS extern int mca_fcoll_dynamic_gen2_priority; extern int mca_fcoll_dynamic_gen2_num_groups; -extern int mca_fcoll_dynamic_gen2_write_chunksize; OMPI_MODULE_DECLSPEC extern mca_fcoll_base_component_2_0_0_t mca_fcoll_dynamic_gen2_component; diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_component.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_component.c index 055b6b244b2..688a70138a1 100644 --- a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_component.c +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_component.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2008-2020 University of Houston. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -42,7 +42,6 @@ const char *mca_fcoll_dynamic_gen2_component_version_string = */ int mca_fcoll_dynamic_gen2_priority = 10; int mca_fcoll_dynamic_gen2_num_groups = 1; -int mca_fcoll_dynamic_gen2_write_chunksize = -1; /* * Local function @@ -95,12 +94,5 @@ dynamic_gen2_register(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_fcoll_dynamic_gen2_num_groups); - mca_fcoll_dynamic_gen2_write_chunksize = -1; - (void) mca_base_component_var_register(&mca_fcoll_dynamic_gen2_component.fcollm_version, - "write_chunksize", "Chunk size written at once. Default: stripe_size of the file system", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &mca_fcoll_dynamic_gen2_write_chunksize); - return OMPI_SUCCESS; } diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c index 97631409909..d39e94201f7 100644 --- a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2008-2020 University of Houston. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -92,7 +92,7 @@ typedef struct mca_io_ompio_aggregator_data { static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_io_ompio_aggregator_data *data, ompi_request_t **reqs ); -static int write_init (ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator_data *aggr_data, int write_chunksize ); +static int write_init (ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator_data *aggr_data ); int mca_fcoll_dynamic_gen2_break_file_view ( struct iovec *decoded_iov, int iov_count, struct iovec *local_iov_array, int local_count, @@ -111,8 +111,7 @@ static int local_heap_sort (mca_io_ompio_local_io_array *io_array, int *sorted); int mca_fcoll_dynamic_gen2_split_iov_array ( ompio_file_t *fh, mca_common_ompio_io_array_t *work_array, - int num_entries, int *last_array_pos, int *last_pos_in_field, - int chunk_size ); + int num_entries, int *last_array_pos, int *last_pos_in_field ); int mca_fcoll_dynamic_gen2_file_write_all (ompio_file_t *fh, @@ -145,7 +144,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (ompio_file_t *fh, MPI_Aint *broken_total_lengths=NULL; int *aggregators=NULL; - int write_chunksize, *result_counts=NULL; + int *result_counts=NULL; #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN @@ -199,15 +198,9 @@ int mca_fcoll_dynamic_gen2_file_write_all (ompio_file_t *fh, if ( fh->f_stripe_size == 0 ) { // EDGAR: just a quick heck for testing + //fh->f_stripe_size = 1048576; fh->f_stripe_size = 65536; } - if ( -1 == mca_fcoll_dynamic_gen2_write_chunksize ) { - write_chunksize = fh->f_stripe_size; - } - else { - write_chunksize = mca_fcoll_dynamic_gen2_write_chunksize; - } - ret = mca_fcoll_dynamic_gen2_get_configuration (fh, &dynamic_gen2_num_io_procs, &aggregators); if (OMPI_SUCCESS != ret){ @@ -608,7 +601,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_write_time = MPI_Wtime(); #endif - ret = write_init (fh, aggregators[i], aggr_data[i], write_chunksize ); + ret = write_init (fh, aggregators[i], aggr_data[i] ); if (OMPI_SUCCESS != ret){ goto exit; } @@ -637,7 +630,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_write_time = MPI_Wtime(); #endif - ret = write_init (fh, aggregators[i], aggr_data[i], write_chunksize ); + ret = write_init (fh, aggregators[i], aggr_data[i] ); if (OMPI_SUCCESS != ret){ goto exit; } @@ -735,7 +728,7 @@ exit : } -static int write_init (ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator_data *aggr_data, int write_chunksize ) +static int write_init (ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator_data *aggr_data ) { int ret=OMPI_SUCCESS; int last_array_pos=0; @@ -743,18 +736,36 @@ static int write_init (ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator if ( aggregator == fh->f_rank && aggr_data->prev_num_io_entries) { - while ( aggr_data->prev_bytes_to_write > 0 ) { + fh->f_flags |= OMPIO_COLLECTIVE_OP; + while ( aggr_data->prev_bytes_to_write > 0 ) { + ssize_t tret; aggr_data->prev_bytes_to_write -= mca_fcoll_dynamic_gen2_split_iov_array (fh, aggr_data->prev_io_array, aggr_data->prev_num_io_entries, - &last_array_pos, &last_pos, - write_chunksize ); - if ( 0 > fh->f_fbtl->fbtl_pwritev (fh)) { + &last_array_pos, &last_pos ); + tret = fh->f_fbtl->fbtl_pwritev (fh); + if ( 0 > tret ) { free ( aggr_data->prev_io_array); opal_output (1, "dynamic_gen2_write_all: fbtl_pwritev failed\n"); ret = OMPI_ERROR; goto exit; } + +#if DEBUG_ON + printf("fh->f_num_of_io_entries=%d\n", fh->f_num_of_io_entries); + printf("[%d]: fh->f_io_array[0].offset = %ld .size = %ld\n", fh->f_rank, (long)fh->f_io_array[0].offset, + fh->f_io_array[0].length); + if ( fh->f_num_of_io_entries > 1 ) + printf("[%d]: fh->f_io_array[1].offset = %ld .size = %ld\n", fh->f_rank, (long)fh->f_io_array[1].offset, + fh->f_io_array[1].length); + + + int n = fh->f_num_of_io_entries-1; + if ( fh->f_num_of_io_entries > 2 ) + printf("[%d]: fh->f_io_array[n].offset = %ld .size = %ld\n", fh->f_rank, (long)fh->f_io_array[n].offset, + fh->f_io_array[n].length); +#endif } + fh->f_flags &= ~OMPIO_COLLECTIVE_OP; free ( fh->f_io_array ); free ( aggr_data->prev_io_array); } @@ -1595,14 +1606,15 @@ int mca_fcoll_dynamic_gen2_get_configuration (ompio_file_t *fh, int *dynamic_gen int mca_fcoll_dynamic_gen2_split_iov_array ( ompio_file_t *fh, mca_common_ompio_io_array_t *io_array, int num_entries, - int *ret_array_pos, int *ret_pos, int chunk_size ) + int *ret_array_pos, int *ret_pos ) { int array_pos = *ret_array_pos; int pos = *ret_pos; size_t bytes_written = 0; - size_t bytes_to_write = chunk_size; - + off_t baseaddr = ((off_t)io_array[array_pos].offset + pos) - (((off_t)io_array[array_pos].offset + pos) % (off_t)fh->f_stripe_size); + off_t endaddr = baseaddr + fh->f_stripe_size; + if ( 0 == array_pos && 0 == pos ) { fh->f_io_array = (mca_common_ompio_io_array_t *) malloc ( num_entries * sizeof(mca_common_ompio_io_array_t)); if ( NULL == fh->f_io_array ){ @@ -1612,32 +1624,28 @@ int mca_fcoll_dynamic_gen2_split_iov_array ( ompio_file_t *fh, mca_common_ompio_ } int i=0; - while (bytes_to_write > 0 ) { - fh->f_io_array[i].memory_address = &(((char *)io_array[array_pos].memory_address)[pos]); - fh->f_io_array[i].offset = &(((char *)io_array[array_pos].offset)[pos]); + do { + fh->f_io_array[i].memory_address = (char *)io_array[array_pos].memory_address + pos; + fh->f_io_array[i].offset = (char *)io_array[array_pos].offset + pos; - if ( (io_array[array_pos].length - pos ) >= bytes_to_write ) { - fh->f_io_array[i].length = bytes_to_write; + off_t length = io_array[array_pos].length - pos; + + if ( ( (off_t)fh->f_io_array[i].offset + length) < endaddr ) { + fh->f_io_array[i].length = length; } else { - fh->f_io_array[i].length = io_array[array_pos].length - pos; + fh->f_io_array[i].length = endaddr - (size_t)fh->f_io_array[i].offset; } - + pos += fh->f_io_array[i].length; bytes_written += fh->f_io_array[i].length; - bytes_to_write-= fh->f_io_array[i].length; i++; if ( pos == (int)io_array[array_pos].length ) { pos = 0; - if ((array_pos + 1) < num_entries) { - array_pos++; - } - else { - break; - } + array_pos++; } - } + } while ( (array_pos < num_entries) && (((off_t)io_array[array_pos].offset+pos ) < endaddr) ); fh->f_num_of_io_entries = i; *ret_array_pos = array_pos; diff --git a/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c b/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c index abf281a0a3f..8b2ccaa183d 100644 --- a/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c +++ b/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c @@ -771,7 +771,9 @@ static int write_init (ompio_file_t *fh, } } else { + fh->f_flags |= OMPIO_COLLECTIVE_OP; ret_temp = fh->f_fbtl->fbtl_pwritev(fh); + fh->f_flags &= ~OMPIO_COLLECTIVE_OP; if(0 > ret_temp) { opal_output (1, "vulcan_write_all: fbtl_pwritev failed\n"); ret = ret_temp; diff --git a/ompi/mca/fs/lustre/fs_lustre_file_open.c b/ompi/mca/fs/lustre/fs_lustre_file_open.c index 7d8540025d1..3a0e1c049b7 100644 --- a/ompi/mca/fs/lustre/fs_lustre_file_open.c +++ b/ompi/mca/fs/lustre/fs_lustre_file_open.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2018 University of Houston. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science + * Copyright (c) 2015-2020 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -144,6 +144,7 @@ mca_fs_lustre_file_open (struct ompi_communicator_t *comm, fh->f_stripe_size = lump->lmm_stripe_size; fh->f_stripe_count = lump->lmm_stripe_count; fh->f_fs_block_size = lump->lmm_stripe_size; + fh->f_flags |= OMPIO_LOCK_NEVER; return OMPI_SUCCESS; } diff --git a/ompi/mca/hook/comm_method/hook_comm_method_component.c b/ompi/mca/hook/comm_method/hook_comm_method_component.c index 025439b6045..ca27c8703be 100644 --- a/ompi/mca/hook/comm_method/hook_comm_method_component.c +++ b/ompi/mca/hook/comm_method/hook_comm_method_component.c @@ -66,14 +66,29 @@ const ompi_hook_base_component_1_0_0_t mca_hook_comm_method_component = { .hookm_mpi_finalize_bottom = NULL, }; +enum mca_hook_comm_method_mode_flags_t { + /* Display on MPI_INIT */ + OMPI_HOOK_COMM_METHOD_INIT = 0x01, + /* Display on MPI_FINALIZE */ + OMPI_HOOK_COMM_METHOD_FINALIZE = 0x02, +}; + int mca_hook_comm_method_verbose = 0; int mca_hook_comm_method_output = -1; bool mca_hook_comm_method_enable_mpi_init = false; bool mca_hook_comm_method_enable_mpi_finalize = false; +uint32_t mca_hook_comm_method_enabled_flags = 0x00; int mca_hook_comm_method_max = 12; int mca_hook_comm_method_brief = 0; char *mca_hook_comm_method_fakefile = NULL; +static mca_base_var_enum_value_flag_t mca_hook_comm_method_modes[] = { + {.flag = OMPI_HOOK_COMM_METHOD_INIT, .string = "mpi_init"}, + {.flag = OMPI_HOOK_COMM_METHOD_FINALIZE, .string = "mpi_finalize"}, + {0, NULL, 0} +}; + + static int ompi_hook_comm_method_component_open(void) { // Nothing to do @@ -88,6 +103,8 @@ static int ompi_hook_comm_method_component_close(void) static int ompi_hook_comm_method_component_register(void) { + int ret; + mca_base_var_enum_flag_t *mca_hook_comm_method_flags = NULL; /* * Component verbosity level @@ -111,53 +128,51 @@ static int ompi_hook_comm_method_component_register(void) opal_output_set_verbosity(mca_hook_comm_method_output, mca_hook_comm_method_verbose); /* - * If the component is active for mpi_init / mpi_finalize + * If the component is active for mpi_init / mpi_finalize via the MCA + * option: ompi_display_comm + * We created both a component level version of this parameter: hook_comm_method_display + * along with a OMPI project level version (ompi_display_comm) for ease of + * use to enable this feature. The user can fine tune the behavior of this + * feature using the additional component level MCA options. */ mca_hook_comm_method_enable_mpi_init = false; - (void) mca_base_component_var_register(&mca_hook_comm_method_component.hookm_version, "enable_mpi_init", - "Enable comm_method behavior on mpi_init", - MCA_BASE_VAR_TYPE_BOOL, NULL, - 0, 0, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_hook_comm_method_enable_mpi_init); - mca_hook_comm_method_enable_mpi_finalize = false; - (void) mca_base_component_var_register(&mca_hook_comm_method_component.hookm_version, "enable_mpi_finalize", - "Enable comm_method behavior on mpi_finalize", - MCA_BASE_VAR_TYPE_BOOL, NULL, - 0, 0, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_hook_comm_method_enable_mpi_finalize); - - // User can set the comm_method mca variable too - int hook_comm_method = -1; - (void) mca_base_var_register("ompi", NULL, NULL, "comm_method", - "Enable comm_method behavior (1) mpi_init or (2) mpi_finalize", - MCA_BASE_VAR_TYPE_INT, NULL, - 0, 0, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_READONLY, - &hook_comm_method); - - if( 1 == hook_comm_method ) { - mca_hook_comm_method_enable_mpi_init = true; + mca_base_var_enum_create_flag("ompi_comm_method", mca_hook_comm_method_modes, &mca_hook_comm_method_flags); + + ret = mca_base_component_var_register(&mca_hook_comm_method_component.hookm_version, "display", + "Enable the communication protocol report: when MPI_INIT is invoked (using the 'mpi_init' value) and/or when MPI_FINALIZE is invoked (using the 'mpi_finalize' value).", + MCA_BASE_VAR_TYPE_UNSIGNED_INT, + &mca_hook_comm_method_flags->super, + 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_hook_comm_method_enabled_flags); + + (void) mca_base_var_register_synonym(ret, "ompi", "ompi", NULL, "display_comm", MCA_BASE_VAR_SYN_FLAG_INTERNAL); + + OBJ_RELEASE(mca_hook_comm_method_flags); + if(OPAL_ERR_VALUE_OUT_OF_BOUNDS == ret) { + opal_output(0, "hook:comm_method: Warning invalid comm_method specified."); } - else if( 2 == hook_comm_method ) { - mca_hook_comm_method_enable_mpi_finalize = true; + else { + if( mca_hook_comm_method_enabled_flags & OMPI_HOOK_COMM_METHOD_INIT ) { + mca_hook_comm_method_enable_mpi_init = true; + } + if( mca_hook_comm_method_enabled_flags & OMPI_HOOK_COMM_METHOD_FINALIZE ) { + mca_hook_comm_method_enable_mpi_finalize = true; + } } - // comm_method_max - (void) mca_base_var_register("ompi", NULL, NULL, "comm_method_max", + // hook_comm_method_max + (void) mca_base_component_var_register(&mca_hook_comm_method_component.hookm_version, "max", "Number of hosts for which to print unabbreviated 2d table of comm methods.", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_READONLY, &mca_hook_comm_method_max); - // comm_method_brief - (void) mca_base_var_register("ompi", NULL, NULL, "comm_method_brief", + // hook_comm_method_brief + (void) mca_base_component_var_register(&mca_hook_comm_method_component.hookm_version, "brief", "Only print the comm method summary, skip the 2d table.", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, @@ -165,9 +180,9 @@ static int ompi_hook_comm_method_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_hook_comm_method_brief); - // comm_method_fakefile is just for debugging, allows complete override of all the + // hook_comm_method_fakefile is just for debugging, allows complete override of all the // comm method in the table - (void) mca_base_var_register("ompi", NULL, NULL, "comm_method_fakefile", + (void) mca_base_component_var_register(&mca_hook_comm_method_component.hookm_version, "fakefile", "For debugging only: read comm methods from a file", MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, diff --git a/ompi/mca/io/ompio/io_ompio_component.c b/ompi/mca/io/ompio/io_ompio_component.c index 2d82f164750..09d18aad75c 100644 --- a/ompi/mca/io/ompio/io_ompio_component.c +++ b/ompi/mca/io/ompio/io_ompio_component.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2019 University of Houston. All rights reserved. + * Copyright (c) 2008-2020 University of Houston. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015-2018 Research Organization for Information Science @@ -302,42 +302,11 @@ file_query(struct ompi_file_t *file, int *priority) { mca_common_ompio_data_t *data; - char *tmp; - int rank; - int is_lustre=0; //false - - tmp = strchr (file->f_filename, ':'); - rank = ompi_comm_rank ( file->f_comm); - if (!tmp) { - if ( 0 == rank) { - if (LUSTRE == mca_fs_base_get_fstype(file->f_filename)) { - is_lustre = 1; //true - } - } - - file->f_comm->c_coll->coll_bcast (&is_lustre, - 1, - MPI_INT, - 0, - file->f_comm, - file->f_comm->c_coll->coll_bcast_module); - } - else { - if (!strncasecmp(file->f_filename, "lustre:", 7) ) { - is_lustre = 1; - } - } - if (is_lustre) { - *priority = 1; - } - else { - *priority = priority_param; - } + *priority = priority_param; /* Allocate a space for this module to hang private data (e.g., the OMPIO file handle) */ - data = calloc(1, sizeof(mca_common_ompio_data_t)); if (NULL == data) { return NULL; @@ -346,7 +315,6 @@ file_query(struct ompi_file_t *file, *private_data = (struct mca_io_base_file_t*) data; /* All done */ - return &mca_io_ompio_module; } diff --git a/ompi/mca/mtl/ofi/mtl_ofi_component.c b/ompi/mca/mtl/ofi/mtl_ofi_component.c index 90be7634522..908be25fbc9 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_component.c +++ b/ompi/mca/mtl/ofi/mtl_ofi_component.c @@ -626,6 +626,10 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, hints->domain_attr->threading = FI_THREAD_DOMAIN; } + if ((MTL_OFI_TAG_AUTO == ofi_tag_mode) || (MTL_OFI_TAG_FULL == ofi_tag_mode)) { + hints->domain_attr->cq_data_size = sizeof(int); + } + switch (control_progress) { case MTL_OFI_PROG_AUTO: hints->domain_attr->control_progress = FI_PROGRESS_AUTO; diff --git a/ompi/mca/op/avx/Makefile.am b/ompi/mca/op/avx/Makefile.am index 41dcf2e1834..b1d84d90b33 100644 --- a/ompi/mca/op/avx/Makefile.am +++ b/ompi/mca/op/avx/Makefile.am @@ -2,7 +2,7 @@ # Copyright (c) 2019-2020 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2020 Research Organization for Information Science +# Copyright (c) 2020-2021 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ # @@ -86,7 +86,7 @@ mcacomponentdir = $(ompilibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_op_avx_la_SOURCES = $(sources) mca_op_avx_la_LIBADD = $(specialized_op_libs) -mca_op_avx_la_LDFLAGS = -module -avoid-version +mca_op_avx_la_LDFLAGS = -module -avoid-version $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la # Specific information for static builds. diff --git a/ompi/mca/op/avx/configure.m4 b/ompi/mca/op/avx/configure.m4 index 09d8b374c8e..72490f5cc7d 100644 --- a/ompi/mca/op/avx/configure.m4 +++ b/ompi/mca/op/avx/configure.m4 @@ -29,6 +29,13 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ op_avx_support=0 op_avx2_support=0 op_avx512_support=0 + + AS_VAR_PUSHDEF([op_avx_check_sse3], [ompi_cv_op_avx_check_sse3]) + AS_VAR_PUSHDEF([op_avx_check_sse41], [ompi_cv_op_avx_check_sse41]) + AS_VAR_PUSHDEF([op_avx_check_avx], [ompi_cv_op_avx_check_avx]) + AS_VAR_PUSHDEF([op_avx_check_avx2], [ompi_cv_op_avx_check_avx2]) + AS_VAR_PUSHDEF([op_avx_check_avx512], [ompi_cv_op_avx_check_avx512]) + OPAL_VAR_SCOPE_PUSH([op_avx_cflags_save]) AS_IF([test "$opal_cv_asm_arch" = "X86_64"], @@ -37,213 +44,258 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ # # Check for AVX512 support # - AC_MSG_CHECKING([for AVX512 support (no additional flags)]) - AC_LINK_IFELSE( - [AC_LANG_PROGRAM([[#include ]], - [[ - __m512 vA, vB; - _mm512_add_ps(vA, vB) - ]])], - [op_avx512_support=1 - AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no])]) - - AS_IF([test $op_avx512_support -eq 0], - [AC_MSG_CHECKING([for AVX512 support (with -march=skylake-avx512)]) - op_avx_cflags_save="$CFLAGS" - CFLAGS="$CFLAGS -march=skylake-avx512" + AC_CACHE_CHECK([for AVX512 support], op_avx_check_avx512, AS_VAR_SET(op_avx_check_avx512, yes)) + AS_IF([test "$op_avx_check_avx512" = "yes"], + [AC_MSG_CHECKING([for AVX512 support (no additional flags)]) AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], [[ +#if defined(__ICC) && !defined(__AVX512F__) +#error "icc needs the -m flags to provide the AVX* detection macros +#endif __m512 vA, vB; _mm512_add_ps(vA, vB) ]])], [op_avx512_support=1 - MCA_BUILD_OP_AVX512_FLAGS="-march=skylake-avx512" AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])]) - CFLAGS="$op_avx_cflags_save" - ]) - # - # Some combination of gcc and older as would not correctly build the code generated by - # _mm256_loadu_si256. Screen them out. - # - AS_IF([test $op_avx512_support -eq 1], - [AC_MSG_CHECKING([if _mm512_loadu_si512 generates code that can be compiled]) - op_avx_cflags_save="$CFLAGS" - CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX512_FLAGS" - AC_LINK_IFELSE( - [AC_LANG_PROGRAM([[#include ]], - [[ + + AS_IF([test $op_avx512_support -eq 0], + [AC_MSG_CHECKING([for AVX512 support (with -mavx512f -mavx512bw -mavx512vl -mavx512dq)]) + op_avx_cflags_save="$CFLAGS" + CFLAGS="-mavx512f -mavx512bw -mavx512vl -mavx512dq $CFLAGS" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[ +#if defined(__ICC) && !defined(__AVX512F__) +#error "icc needs the -m flags to provide the AVX* detection macros +#endif + __m512 vA, vB; + _mm512_add_ps(vA, vB) + ]])], + [op_avx512_support=1 + MCA_BUILD_OP_AVX512_FLAGS="-mavx512f -mavx512bw -mavx512vl -mavx512dq" + AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no])]) + CFLAGS="$op_avx_cflags_save" + ]) + # + # Some combination of gcc and older as would not correctly build the code generated by + # _mm256_loadu_si256. Screen them out. + # + AS_IF([test $op_avx512_support -eq 1], + [AC_MSG_CHECKING([if _mm512_loadu_si512 generates code that can be compiled]) + op_avx_cflags_save="$CFLAGS" + CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX512_FLAGS" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[ +#if defined(__ICC) && !defined(__AVX512F__) +#error "icc needs the -m flags to provide the AVX* detection macros +#endif int A[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; __m512i vA = _mm512_loadu_si512((__m512i*)&(A[1])) - ]])], - [AC_MSG_RESULT([yes])], - [op_avx512_support=0 - MCA_BUILD_OP_AVX512_FLAGS="" - AC_MSG_RESULT([no])]) - CFLAGS="$op_avx_cflags_save" - ]) - # - # Some PGI compilers do not define _mm512_mullo_epi64. Screen them out. - # - AS_IF([test $op_avx512_support -eq 1], - [AC_MSG_CHECKING([if _mm512_mullo_epi64 generates code that can be compiled]) - op_avx_cflags_save="$CFLAGS" - CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX512_FLAGS" - AC_LINK_IFELSE( - [AC_LANG_PROGRAM([[#include ]], - [[ + ]])], + [AC_MSG_RESULT([yes])], + [op_avx512_support=0 + MCA_BUILD_OP_AVX512_FLAGS="" + AC_MSG_RESULT([no])]) + CFLAGS="$op_avx_cflags_save" + ]) + # + # Some PGI compilers do not define _mm512_mullo_epi64. Screen them out. + # + AS_IF([test $op_avx512_support -eq 1], + [AC_MSG_CHECKING([if _mm512_mullo_epi64 generates code that can be compiled]) + op_avx_cflags_save="$CFLAGS" + CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX512_FLAGS" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[ +#if defined(__ICC) && !defined(__AVX512F__) +#error "icc needs the -m flags to provide the AVX* detection macros +#endif __m512i vA, vB; _mm512_mullo_epi64(vA, vB) - ]])], - [AC_MSG_RESULT([yes])], - [op_avx512_support=0 - MCA_BUILD_OP_AVX512_FLAGS="" - AC_MSG_RESULT([no])]) - CFLAGS="$op_avx_cflags_save" - ]) + ]])], + [AC_MSG_RESULT([yes])], + [op_avx512_support=0 + MCA_BUILD_OP_AVX512_FLAGS="" + AC_MSG_RESULT([no])]) + CFLAGS="$op_avx_cflags_save" + ])]) # # Check support for AVX2 # - AC_MSG_CHECKING([for AVX2 support (no additional flags)]) - AC_LINK_IFELSE( - [AC_LANG_PROGRAM([[#include ]], - [[ - __m256 vA, vB; - _mm256_add_ps(vA, vB) - ]])], - [op_avx2_support=1 - AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no])]) - AS_IF([test $op_avx2_support -eq 0], - [AC_MSG_CHECKING([for AVX2 support (with -mavx2)]) - op_avx_cflags_save="$CFLAGS" - CFLAGS="$CFLAGS -mavx2" - AC_LINK_IFELSE( - [AC_LANG_PROGRAM([[#include ]], - [[ - __m256 vA, vB; - _mm256_add_ps(vA, vB) - ]])], - [op_avx2_support=1 - MCA_BUILD_OP_AVX2_FLAGS="-mavx2" - AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no])]) - CFLAGS="$op_avx_cflags_save" - ]) - # - # Some combination of gcc and older as would not correctly build the code generated by - # _mm256_loadu_si256. Screen them out. - # - AS_IF([test $op_avx2_support -eq 1], - [AC_MSG_CHECKING([if _mm256_loadu_si256 generates code that can be compiled]) - op_avx_cflags_save="$CFLAGS" - CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX2_FLAGS" + AC_CACHE_CHECK([for AVX2 support], op_avx_check_avx2, AS_VAR_SET(op_avx_check_avx2, yes)) + AS_IF([test "$op_avx_check_avx2" = "yes"], + [AC_MSG_CHECKING([for AVX2 support (no additional flags)]) AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], - [[ + [[ +#if defined(__ICC) && !defined(__AVX2__) +#error "icc needs the -m flags to provide the AVX* detection macros +#endif + __m256i vA, vB, vC; + vC = _mm256_and_si256(vA, vB) + ]])], + [op_avx2_support=1 + AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no])]) + AS_IF([test $op_avx2_support -eq 0], + [AC_MSG_CHECKING([for AVX2 support (with -mavx2)]) + op_avx_cflags_save="$CFLAGS" + CFLAGS="-mavx2 $CFLAGS" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[ +#if defined(__ICC) && !defined(__AVX2__) +#error "icc needs the -m flags to provide the AVX* detection macros +#endif + __m256i vA, vB, vC; + vC = _mm256_and_si256(vA, vB) + ]])], + [op_avx2_support=1 + MCA_BUILD_OP_AVX2_FLAGS="-mavx2" + AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no])]) + CFLAGS="$op_avx_cflags_save" + ]) + # + # Some combination of gcc and older as would not correctly build the code generated by + # _mm256_loadu_si256. Screen them out. + # + AS_IF([test $op_avx2_support -eq 1], + [AC_MSG_CHECKING([if _mm256_loadu_si256 generates code that can be compiled]) + op_avx_cflags_save="$CFLAGS" + CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX2_FLAGS" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[ +#if defined(__ICC) && !defined(__AVX2__) +#error "icc needs the -m flags to provide the AVX* detection macros +#endif int A[8] = {0, 1, 2, 3, 4, 5, 6, 7}; __m256i vA = _mm256_loadu_si256((__m256i*)&A) - ]])], - [AC_MSG_RESULT([yes])], - [op_avx2_support=0 - MCA_BUILD_OP_AVX2_FLAGS="" - AC_MSG_RESULT([no])]) - CFLAGS="$op_avx_cflags_save" - ]) + ]])], + [AC_MSG_RESULT([yes])], + [op_avx2_support=0 + MCA_BUILD_OP_AVX2_FLAGS="" + AC_MSG_RESULT([no])]) + CFLAGS="$op_avx_cflags_save" + ])]) # - # What about early AVX support. The rest of the logic is slightly different as + # What about early AVX support? The rest of the logic is slightly different as # we need to include some of the SSE4.1 and SSE3 instructions. So, we first check # if we can compile AVX code without a flag, then we validate that we have support # for the SSE4.1 and SSE3 instructions we need. If not, we check for the usage of # the AVX flag, and then recheck if we have support for the SSE4.1 and SSE3 # instructions. # - AC_MSG_CHECKING([for AVX support (no additional flags)]) - AC_LINK_IFELSE( - [AC_LANG_PROGRAM([[#include ]], - [[ - __m128 vA, vB; - _mm_add_ps(vA, vB) - ]])], - [op_avx_support=1 - AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no])]) + AC_CACHE_CHECK([for AVX support], op_avx_check_avx, AS_VAR_SET(op_avx_check_avx, yes)) + AS_IF([test "$op_avx_check_avx" = "yes"], + [AC_MSG_CHECKING([for AVX support (no additional flags)]) + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[ +#if defined(__ICC) && !defined(__AVX__) +#error "icc needs the -m flags to provide the AVX* detection macros +#endif + __m256 vA, vB, vC; + vC = _mm256_add_ps(vA, vB) + ]])], + [op_avx_support=1 + AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no])])]) # # Check for SSE4.1 support # - AS_IF([test $op_avx_support -eq 1], - [AC_MSG_CHECKING([for SSE4.1 support]) - AC_LINK_IFELSE( - [AC_LANG_PROGRAM([[#include ]], - [[ + AC_CACHE_CHECK([for SSE4.1 support], op_avx_check_sse41, AS_VAR_SET(op_avx_check_sse41, yes)) + AS_IF([test $op_avx_support -eq 1 && test "$op_avx_check_sse41" = "yes"], + [AC_MSG_CHECKING([for SSE4.1 support]) + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[ +#if defined(__ICC) && !defined(__SSE4_1__) +#error "icc needs the -m flags to provide the AVX* detection macros +#endif __m128i vA, vB; (void)_mm_max_epi8(vA, vB) - ]])], - [op_sse41_support=1 - AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no])]) - ]) + ]])], + [op_sse41_support=1 + AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no])]) + ]) # # Check for SSE3 support # - AS_IF([test $op_avx_support -eq 1], - [AC_MSG_CHECKING([for SSE3 support]) - AC_LINK_IFELSE( - [AC_LANG_PROGRAM([[#include ]], - [[ + AC_CACHE_CHECK([for SSE3 support], op_avx_check_sse3, AS_VAR_SET(op_avx_check_sse3, yes)) + AS_IF([test $op_avx_support -eq 1 && test "$op_avx_check_sse3" = "yes"], + [AC_MSG_CHECKING([for SSE3 support]) + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[ +#if defined(__ICC) && !defined(__SSE3__) +#error "icc needs the -m flags to provide the AVX* detection macros +#endif int A[4] = {0, 1, 2, 3}; __m128i vA = _mm_lddqu_si128((__m128i*)&A) - ]])], - [op_sse3_support=1 - AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no])]) - ]) + ]])], + [op_sse3_support=1 + AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no])]) + ]) # Second pass, do we need to add the AVX flag ? AS_IF([test $op_avx_support -eq 0 || test $op_sse41_support -eq 0 || test $op_sse3_support -eq 0], - [AC_MSG_CHECKING([for AVX support (with -mavx)]) - op_avx_cflags_save="$CFLAGS" - CFLAGS="$CFLAGS -mavx" - AC_LINK_IFELSE( - [AC_LANG_PROGRAM([[#include ]], - [[ - __m128 vA, vB; - _mm_add_ps(vA, vB) + [AS_IF([test "$op_avx_check_avx" = "yes"], + [AC_MSG_CHECKING([for AVX support (with -mavx)]) + op_avx_cflags_save="$CFLAGS" + CFLAGS="-mavx $CFLAGS" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[ + __m256 vA, vB, vC; +#if defined(__ICC) && !defined(__AVX__) +#error "icc needs the -m flags to provide the AVX* detection macros +#endif + vC = _mm256_add_ps(vA, vB) ]])], - [op_avx_support=1 - MCA_BUILD_OP_AVX_FLAGS="-mavx" - op_sse41_support=0 - op_sse3_support=0 - AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no])]) + [op_avx_support=1 + MCA_BUILD_OP_AVX_FLAGS="-mavx" + op_sse41_support=0 + op_sse3_support=0 + AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no])])]) - AS_IF([test $op_sse41_support -eq 0], - [AC_MSG_CHECKING([for SSE4.1 support]) - AC_LINK_IFELSE( - [AC_LANG_PROGRAM([[#include ]], - [[ + AS_IF([test "$op_avx_check_sse41" = "yes" && test $op_sse41_support -eq 0], + [AC_MSG_CHECKING([for SSE4.1 support]) + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[ +#if defined(__ICC) && !defined(__SSE4_1__) +#error "icc needs the -m flags to provide the AVX* detection macros +#endif __m128i vA, vB; (void)_mm_max_epi8(vA, vB) - ]])], - [op_sse41_support=1 - AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no])]) - ]) - AS_IF([test $op_sse3_support -eq 0], - [AC_MSG_CHECKING([for SSE3 support]) - AC_LINK_IFELSE( - [AC_LANG_PROGRAM([[#include ]], + ]])], + [op_sse41_support=1 + AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no])])]) + AS_IF([test "$op_avx_check_sse3" = "yes" && test $op_sse3_support -eq 0], + [AC_MSG_CHECKING([for SSE3 support]) + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[#include ]], [[ +#if defined(__ICC) && !defined(__SSE3__) +#error "icc needs the -m flags to provide the AVX* detection macros +#endif int A[4] = {0, 1, 2, 3}; __m128i vA = _mm_lddqu_si128((__m128i*)&A) ]])], - [op_sse3_support=1 - AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no])]) - ]) - CFLAGS="$op_avx_cflags_save" - ]) + [op_sse3_support=1 + AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no])])]) + CFLAGS="$op_avx_cflags_save"]) AC_LANG_POP([C]) ]) @@ -276,6 +328,12 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ AC_SUBST(MCA_BUILD_OP_AVX2_FLAGS) AC_SUBST(MCA_BUILD_OP_AVX_FLAGS) + AS_VAR_POPDEF([op_avx_check_avx512]) + AS_VAR_POPDEF([op_avx_check_avx2]) + AS_VAR_POPDEF([op_avx_check_avx]) + AS_VAR_POPDEF([op_avx_check_sse41]) + AS_VAR_POPDEF([op_avx_check_sse3]) + OPAL_VAR_SCOPE_POP # Enable this component iff we have at least the most basic form of support # for vectorial ISA diff --git a/ompi/mca/op/avx/op_avx_functions.c b/ompi/mca/op/avx/op_avx_functions.c index 95a9c9ab84e..ef3f0932906 100644 --- a/ompi/mca/op/avx/op_avx_functions.c +++ b/ompi/mca/op/avx/op_avx_functions.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 The University of Tennessee and The University + * Copyright (c) 2019-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2020 Research Organization for Information Science @@ -24,16 +24,42 @@ #include "ompi/mca/op/avx/op_avx.h" #include - +/** + * The following logic is necessary to cope with distro maintainer's desire to change the compilation + * flags after the configure step, leading to inconsistencies between what OMPI has detected and what + * code can be generated during make. If we detect that the current code generation architecture has + * been changed from our own setting and cannot generate the code we need (AVX512, AVX2) we fall back + * to a lesser support (AVX512 -> AVX2, AVX2 -> AVX, AVX -> error out). + */ #if defined(GENERATE_AVX512_CODE) -#define PREPEND _avx512 -#elif defined(GENERATE_AVX2_CODE) -#define PREPEND _avx2 -#elif defined(GENERATE_AVX_CODE) -#define PREPEND _avx -#else -#error This file should not be compiled in this conditions -#endif +# if defined(__AVX512BW__) && defined(__AVX512F__) && defined(__AVX512VL__) +# define PREPEND _avx512 +# else +# undef GENERATE_AVX512_CODE +# endif /* defined(__AVX512BW__) && defined(__AVX512F__) && defined(__AVX512VL__) */ +#endif /* defined(GENERATE_AVX512_CODE) */ + +#if !defined(PREPEND) && defined(GENERATE_AVX2_CODE) +# if defined(__AVX2__) +# define PREPEND _avx2 +# else +# undef GENERATE_AVX2_CODE +# endif /* defined(__AVX2__) */ +#endif /* !defined(PREPEND) && defined(GENERATE_AVX2_CODE) */ + +#if !defined(PREPEND) && defined(GENERATE_AVX_CODE) +# if defined(__AVX__) +# define PREPEND _avx +# endif +#endif /* !defined(PREPEND) && defined(GENERATE_AVX_CODE) */ + +#if !defined(PREPEND) +# if OMPI_MCA_OP_HAVE_AVX512 || OMPI_MCA_OP_HAVE_AVX2 +# error The configure step has detected possible support for AVX512 and/or AVX2 but the compiler flags during make are too restrictive. Please disable the AVX component by adding --enable-mca-no-build=op-avx to your configure step. +# else +# error This file should not be compiled in this conditions. Please provide the config.log file to the OMPI developers. +# endif /* OMPI_MCA_OP_HAVE_AVX512 || OMPI_MCA_OP_HAVE_AVX2 */ +#endif /* !defined(PREPEND) */ /* * Concatenate preprocessor tokens A and B without expanding macro definitions @@ -46,6 +72,102 @@ */ #define OP_CONCAT(A, B) OP_CONCAT_NX(A, B) +/* + * grep -e "_mm[125][251][862]_.*(" avx512.c -o | sed 's/(//g' | sort | uniq + * + * https://software.intel.com/sites/landingpage/IntrinsicsGuide + * + * _mm_add_epi[8,16,32,64] SSE2 + * _mm_add_pd SSE2 + * _mm_add_ps SSE + * _mm_adds_epi[8,16] SSE2 + * _mm_adds_epu[8,16] SSE2 + * _mm_and_si128 SSE2 + * _mm_lddqu_si128 SSE3 + * _mm_loadu_pd SSE2 + * _mm_loadu_ps SSE + * _mm_max_epi8 SSE4.1 + * _mm_max_epi16 SSE2 + * _mm_max_epi32 SSE4.1 + * _mm_max_epi64 AVX512VL + AVX512F + * _mm_max_epu8 SSE2 + * _mm_max_epu[16,32] SSE4.1 + * _mm_max_epu64 AVX512VL + AVX512F + * _mm_max_pd SSE2 + * _mm_max_ps SSE + * _mm_min_epi8 SSE4.1 + * _mm_min_epi16 SSE2 + * _mm_min_epi32 SSE4.1 + * _mm_min_epi64 AVX512VL + AVX512F + * _mm_min_epu8 SSE2 + * _mm_min_epu[16,32] SSE4.1 + * _mm_min_epu64 AVX512VL + AVX512F + * _mm_min_pd SSE2 + * _mm_min_ps SSE + * _mm_mul_pd SSE2 + * _mm_mul_ps SSE + * _mm_mullo_epi16 SSE2 + * _mm_mullo_epi32 SSE4.1 + * _mm_mullo_epi64 AVX512VL + AVX512DQ + * _mm_or_si128 SSE2 + * _mm_storeu_pd SSE2 + * _mm_storeu_ps SSE + * _mm_storeu_si128 SSE2 + * _mm_xor_si128 SSE2 + * _mm256_add_epi[8,16,32,64] AVX2 + * _mm256_add_p[s,d] AVX + * _mm256_adds_epi[8,16] AVX2 + * _mm256_adds_epu[8,16] AVX2 + * _mm256_and_si256 AVX2 + * _mm256_loadu_p[s,d] AVX + * _mm256_loadu_si256 AVX + * _mm256_max_epi[8,16,32] AVX2 + * _mm256_max_epi64 AVX512VL + AVX512F + * _mm256_max_epu[8,16,32] AVX2 + * _mm256_max_epu64 AVX512VL + AVX512F + * _mm256_max_p[s,d] AVX + * _mm256_min_epi[8,16,32] AVX2 + * _mm256_min_epi64 AVX512VL + AVX512F + * _mm256_min_epu[8,16,32] AVX2 + * _mm256_min_epu64 AVX512VL + AVX512F + * _mm256_min_p[s,d] AVX + * _mm256_mul_p[s,d] AVX + * _mm256_mullo_epi[16,32] AVX2 + * _mm256_mullo_epi64 AVX512VL + AVX512DQ + * _mm256_or_si256 AVX2 + * _mm256_storeu_p[s,d] AVX + * _mm256_storeu_si256 AVX + * _mm256_xor_si256 AVX2 + * _mm512_add_epi[8,16] AVX512BW + * _mm512_add_epi[32,64] AVX512F + * _mm512_add_p[s,d] AVX512F + * _mm512_adds_epi[8,16] AVX512BW + * _mm512_adds_epu[8,16] AVX512BW + * _mm512_and_si512 AVX512F + * _mm512_cvtepi16_epi8 AVX512BW + * _mm512_cvtepi8_epi16 AVX512BW + * _mm512_loadu_p[s,d] AVX512F + * _mm512_loadu_si512 AVX512F + * _mm512_max_epi[8,16] AVX512BW + * _mm512_max_epi[32,64] AVX512F + * _mm512_max_epu[8,16] AVX512BW + * _mm512_max_epu[32,64] AVX512F + * _mm512_max_p[s,d] AVX512F + * _mm512_min_epi[8,16] AVX512BW + * _mm512_min_epi[32,64] AVX512F + * _mm512_min_epu[8,16] AVX512BW + * _mm512_min_epu[32,64] AVX512F + * _mm512_min_p[s,d] AVX512F + * _mm512_mul_p[s,d] AVX512F + * _mm512_mullo_epi16 AVX512BW + * _mm512_mullo_epi32 AVX512F + * _mm512_mullo_epi64 AVX512DQ + * _mm512_or_si512 AVX512F + * _mm512_storeu_p[s,d] AVX512F + * _mm512_storeu_si512 AVX512F + * _mm512_xor_si512 AVX512F + */ + /* * Since all the functions in this file are essentially identical, we * use a macro to substitute in names and types. The core operation @@ -62,13 +184,14 @@ (((_flag) & mca_op_avx_component.flags) == (_flag)) #if defined(GENERATE_AVX512_CODE) && defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) +#if __AVX512F__ #define OP_AVX_AVX512_FUNC(name, type_sign, type_size, type, op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX512F_FLAG|OMPI_OP_AVX_HAS_AVX512BW_FLAG) ) { \ int types_per_step = (512 / 8) / sizeof(type); \ for( ; left_over >= types_per_step; left_over -= types_per_step ) { \ - __m512i vecA = _mm512_loadu_si512((__m512*)in); \ + __m512i vecA = _mm512_loadu_si512((__m512*)in); \ in += types_per_step; \ - __m512i vecB = _mm512_loadu_si512((__m512*)out); \ + __m512i vecB = _mm512_loadu_si512((__m512*)out); \ __m512i res = _mm512_##op##_ep##type_sign##type_size(vecA, vecB); \ _mm512_storeu_si512((__m512*)out, res); \ out += types_per_step; \ @@ -76,10 +199,14 @@ if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVX512F support needed for _mm512_loadu_si512 and _mm512_storeu_si512 +#endif /* __AVX512F__ */ +#else #define OP_AVX_AVX512_FUNC(name, type_sign, type_size, type, op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) */ #if defined(GENERATE_AVX2_CODE) && defined(OMPI_MCA_OP_HAVE_AVX2) && (1 == OMPI_MCA_OP_HAVE_AVX2) +#if __AVX__ #define OP_AVX_AVX2_FUNC(name, type_sign, type_size, type, op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX2_FLAG | OMPI_OP_AVX_HAS_AVX_FLAG) ) { \ int types_per_step = (256 / 8) / sizeof(type); /* AVX2 */ \ @@ -87,30 +214,37 @@ __m256i vecA = _mm256_loadu_si256((__m256i*)in); \ in += types_per_step; \ __m256i vecB = _mm256_loadu_si256((__m256i*)out); \ - __m256i res = _mm256_##op##_ep##type_sign##type_size(vecA, vecB); \ + __m256i res = _mm256_##op##_ep##type_sign##type_size(vecA, vecB); \ _mm256_storeu_si256((__m256i*)out, res); \ out += types_per_step; \ } \ if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVX support needed for _mm256_loadu_si256 and _mm256_storeu_si256 +#endif /* __AVX__ */ +#else #define OP_AVX_AVX2_FUNC(name, type_sign, type_size, type, op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX2) && (1 == OMPI_MCA_OP_HAVE_AVX2) */ #if defined(GENERATE_SSE3_CODE) && defined(OMPI_MCA_OP_HAVE_AVX) && (1 == OMPI_MCA_OP_HAVE_AVX) +#if __SSE3__ #define OP_AVX_SSE4_1_FUNC(name, type_sign, type_size, type, op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_SSE3_FLAG | OMPI_OP_AVX_HAS_SSE4_1_FLAG) ) { \ - int types_per_step = (128 / 8) / sizeof(type); /* AVX */ \ + int types_per_step = (128 / 8) / sizeof(type); \ for( ; left_over >= types_per_step; left_over -= types_per_step ) { \ __m128i vecA = _mm_lddqu_si128((__m128i*)in); \ in += types_per_step; \ __m128i vecB = _mm_lddqu_si128((__m128i*)out); \ - __m128i res = _mm_##op##_ep##type_sign##type_size(vecA, vecB); \ + __m128i res = _mm_##op##_ep##type_sign##type_size(vecA, vecB); \ _mm_storeu_si128((__m128i*)out, res); \ out += types_per_step; \ } \ } #else +#error Target architecture lacks SSE3 support needed for _mm_lddqu_si128 and _mm_storeu_si128 +#endif /* __SSE3__ */ +#else #define OP_AVX_SSE4_1_FUNC(name, type_sign, type_size, type, op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX) && (1 == OMPI_MCA_OP_HAVE_AVX) */ @@ -143,12 +277,13 @@ static void OP_CONCAT(ompi_op_avx_2buff_##name##_##type,PREPEND)(const void *_in } #if defined(GENERATE_AVX512_CODE) && defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) +#if __AVX512BW__ && __AVX__ #define OP_AVX_AVX512_MUL(name, type_sign, type_size, type, op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX512F_FLAG | OMPI_OP_AVX_HAS_AVX512BW_FLAG) ) { \ int types_per_step = (256 / 8) / sizeof(type); \ for (; left_over >= types_per_step; left_over -= types_per_step) { \ - __m256i vecA_tmp = _mm256_loadu_si256((__m256i*)in); \ - __m256i vecB_tmp = _mm256_loadu_si256((__m256i*)out); \ + __m256i vecA_tmp = _mm256_loadu_si256((__m256i*)in); \ + __m256i vecB_tmp = _mm256_loadu_si256((__m256i*)out); \ in += types_per_step; \ __m512i vecA = _mm512_cvtepi8_epi16(vecA_tmp); \ __m512i vecB = _mm512_cvtepi8_epi16(vecB_tmp); \ @@ -160,6 +295,9 @@ static void OP_CONCAT(ompi_op_avx_2buff_##name##_##type,PREPEND)(const void *_in if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVX512BW and AVX support needed for _mm256_loadu_si256, _mm256_storeu_si256 and _mm512_cvtepi8_epi16 +#endif /* __AVX512BW__ && __AVX__ */ +#else #define OP_AVX_AVX512_MUL(name, type_sign, type_size, type, op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) */ /** @@ -201,13 +339,14 @@ static void OP_CONCAT( ompi_op_avx_2buff_##name##_##type, PREPEND)(const void *_ * */ #if defined(GENERATE_AVX512_CODE) && defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) +#if __AVX512F__ #define OP_AVX_AVX512_BIT_FUNC(name, type_size, type, op) \ if( OMPI_OP_AVX_HAS_FLAGS( OMPI_OP_AVX_HAS_AVX512F_FLAG) ) { \ types_per_step = (512 / 8) / sizeof(type); \ for (; left_over >= types_per_step; left_over -= types_per_step) { \ - __m512i vecA = _mm512_loadu_si512((__m512i*)in); \ + __m512i vecA = _mm512_loadu_si512((__m512i*)in); \ in += types_per_step; \ - __m512i vecB = _mm512_loadu_si512((__m512i*)out); \ + __m512i vecB = _mm512_loadu_si512((__m512i*)out); \ __m512i res = _mm512_##op##_si512(vecA, vecB); \ _mm512_storeu_si512((__m512i*)out, res); \ out += types_per_step; \ @@ -215,10 +354,14 @@ static void OP_CONCAT( ompi_op_avx_2buff_##name##_##type, PREPEND)(const void *_ if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVX512F support needed for _mm512_loadu_si512 and _mm512_storeu_si512 +#endif /* __AVX512F__ */ +#else #define OP_AVX_AVX512_BIT_FUNC(name, type_size, type, op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) */ #if defined(GENERATE_AVX2_CODE) && defined(OMPI_MCA_OP_HAVE_AVX2) && (1 == OMPI_MCA_OP_HAVE_AVX2) +#if __AVX__ #define OP_AVX_AVX2_BIT_FUNC(name, type_size, type, op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX2_FLAG | OMPI_OP_AVX_HAS_AVX_FLAG) ) { \ types_per_step = (256 / 8) / sizeof(type); \ @@ -226,17 +369,21 @@ static void OP_CONCAT( ompi_op_avx_2buff_##name##_##type, PREPEND)(const void *_ __m256i vecA = _mm256_loadu_si256((__m256i*)in); \ in += types_per_step; \ __m256i vecB = _mm256_loadu_si256((__m256i*)out); \ - __m256i res = _mm256_##op##_si256(vecA, vecB); \ + __m256i res = _mm256_##op##_si256(vecA, vecB); \ _mm256_storeu_si256((__m256i*)out, res); \ out += types_per_step; \ } \ if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVX support needed for _mm256_loadu_si256 and _mm256_storeu_si256 +#endif /* __AVX__ */ +#else #define OP_AVX_AVX2_BIT_FUNC(name, type_size, type, op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX2) && (1 == OMPI_MCA_OP_HAVE_AVX2) */ #if defined(GENERATE_SSE3_CODE) && defined(OMPI_MCA_OP_HAVE_AVX) && (1 == OMPI_MCA_OP_HAVE_AVX) +#if __SSE3__ && __SSE2__ #define OP_AVX_SSE3_BIT_FUNC(name, type_size, type, op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_SSE3_FLAG) ) { \ types_per_step = (128 / 8) / sizeof(type); \ @@ -244,12 +391,15 @@ static void OP_CONCAT( ompi_op_avx_2buff_##name##_##type, PREPEND)(const void *_ __m128i vecA = _mm_lddqu_si128((__m128i*)in); \ in += types_per_step; \ __m128i vecB = _mm_lddqu_si128((__m128i*)out); \ - __m128i res = _mm_##op##_si128(vecA, vecB); \ + __m128i res = _mm_##op##_si128(vecA, vecB); \ _mm_storeu_si128((__m128i*)out, res); \ out += types_per_step; \ } \ } #else +#error Target architecture lacks SSE2 and SSE3 support needed for _mm_lddqu_si128 and _mm_storeu_si128 +#endif /* __SSE3__ && __SSE2__ */ +#else #define OP_AVX_SSE3_BIT_FUNC(name, type_size, type, op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX) && (1 == OMPI_MCA_OP_HAVE_AVX) */ @@ -282,12 +432,13 @@ static void OP_CONCAT(ompi_op_avx_2buff_##name##_##type,PREPEND)(const void *_in } #if defined(GENERATE_AVX512_CODE) && defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) +#if __AVX512F__ #define OP_AVX_AVX512_FLOAT_FUNC(op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX512F_FLAG) ) { \ types_per_step = (512 / 8) / sizeof(float); \ for (; left_over >= types_per_step; left_over -= types_per_step) { \ - __m512 vecA = _mm512_loadu_ps((__m512*)in); \ - __m512 vecB = _mm512_loadu_ps((__m512*)out); \ + __m512 vecA = _mm512_loadu_ps((__m512*)in); \ + __m512 vecB = _mm512_loadu_ps((__m512*)out); \ in += types_per_step; \ __m512 res = _mm512_##op##_ps(vecA, vecB); \ _mm512_storeu_ps((__m512*)out, res); \ @@ -296,28 +447,36 @@ static void OP_CONCAT(ompi_op_avx_2buff_##name##_##type,PREPEND)(const void *_in if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVX512F support needed for _mm512_loadu_ps and _mm512_storeu_ps +#endif /* __AVX512F__ */ +#else #define OP_AVX_AVX512_FLOAT_FUNC(op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) */ #if defined(GENERATE_AVX2_CODE) && defined(OMPI_MCA_OP_HAVE_AVX2) && (1 == OMPI_MCA_OP_HAVE_AVX2) +#if __AVX__ #define OP_AVX_AVX_FLOAT_FUNC(op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX_FLAG) ) { \ types_per_step = (256 / 8) / sizeof(float); \ for( ; left_over >= types_per_step; left_over -= types_per_step ) { \ - __m256 vecA = _mm256_loadu_ps(in); \ + __m256 vecA = _mm256_loadu_ps(in); \ in += types_per_step; \ - __m256 vecB = _mm256_loadu_ps(out); \ + __m256 vecB = _mm256_loadu_ps(out); \ __m256 res = _mm256_##op##_ps(vecA, vecB); \ - _mm256_storeu_ps(out, res); \ + _mm256_storeu_ps(out, res); \ out += types_per_step; \ } \ if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVX support needed for _mm256_loadu_ps and _mm256_storeu_ps +#endif /* __AVX__ */ +#else #define OP_AVX_AVX_FLOAT_FUNC(op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX2) && (1 == OMPI_MCA_OP_HAVE_AVX2) */ #if defined(GENERATE_AVX_CODE) && defined(OMPI_MCA_OP_HAVE_AVX) && (1 == OMPI_MCA_OP_HAVE_AVX) +#if __SSE__ #define OP_AVX_SSE_FLOAT_FUNC(op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_SSE_FLAG) ) { \ types_per_step = (128 / 8) / sizeof(float); \ @@ -331,6 +490,9 @@ static void OP_CONCAT(ompi_op_avx_2buff_##name##_##type,PREPEND)(const void *_in } \ } #else +#error Target architecture lacks SSE support needed for _mm_loadu_ps and _mm_storeu_ps +#endif /* __SSE__ */ +#else #define OP_AVX_SSE_FLOAT_FUNC(op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX) && (1 == OMPI_MCA_OP_HAVE_AVX) */ @@ -363,13 +525,14 @@ static void OP_CONCAT(ompi_op_avx_2buff_##op##_float,PREPEND)(const void *_in, v } #if defined(GENERATE_AVX512_CODE) && defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) +#if __AVX512F__ #define OP_AVX_AVX512_DOUBLE_FUNC(op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX512F_FLAG) ) { \ types_per_step = (512 / 8) / sizeof(double); \ for (; left_over >= types_per_step; left_over -= types_per_step) { \ - __m512d vecA = _mm512_loadu_pd(in); \ + __m512d vecA = _mm512_loadu_pd(in); \ in += types_per_step; \ - __m512d vecB = _mm512_loadu_pd(out); \ + __m512d vecB = _mm512_loadu_pd(out); \ __m512d res = _mm512_##op##_pd(vecA, vecB); \ _mm512_storeu_pd((out), res); \ out += types_per_step; \ @@ -377,17 +540,21 @@ static void OP_CONCAT(ompi_op_avx_2buff_##op##_float,PREPEND)(const void *_in, v if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVXF512 support needed for _mm512_loadu_pd and _mm512_storeu_pd +#endif /* __AVXF512__ */ +#else #define OP_AVX_AVX512_DOUBLE_FUNC(op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) */ #if defined(GENERATE_AVX2_CODE) && defined(OMPI_MCA_OP_HAVE_AVX2) && (1 == OMPI_MCA_OP_HAVE_AVX2) +#if __AVX__ #define OP_AVX_AVX_DOUBLE_FUNC(op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX_FLAG) ) { \ types_per_step = (256 / 8) / sizeof(double); \ for( ; left_over >= types_per_step; left_over -= types_per_step ) { \ - __m256d vecA = _mm256_loadu_pd(in); \ + __m256d vecA = _mm256_loadu_pd(in); \ in += types_per_step; \ - __m256d vecB = _mm256_loadu_pd(out); \ + __m256d vecB = _mm256_loadu_pd(out); \ __m256d res = _mm256_##op##_pd(vecA, vecB); \ _mm256_storeu_pd(out, res); \ out += types_per_step; \ @@ -395,10 +562,14 @@ static void OP_CONCAT(ompi_op_avx_2buff_##op##_float,PREPEND)(const void *_in, v if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVX support needed for _mm256_loadu_pd and _mm256_storeu_pd +#endif /* __AVX__ */ +#else #define OP_AVX_AVX_DOUBLE_FUNC(op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX2) && (1 == OMPI_MCA_OP_HAVE_AVX2) */ #if defined(GENERATE_AVX_CODE) && defined(OMPI_MCA_OP_HAVE_AVX) && (1 == OMPI_MCA_OP_HAVE_AVX) +#if __SSE2__ #define OP_AVX_SSE2_DOUBLE_FUNC(op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_SSE2_FLAG) ) { \ types_per_step = (128 / 8) / sizeof(double); \ @@ -412,6 +583,9 @@ static void OP_CONCAT(ompi_op_avx_2buff_##op##_float,PREPEND)(const void *_in, v } \ } #else +#error Target architecture lacks SSE2 support needed for _mm_loadu_pd and _mm_storeu_pd +#endif /* __SSE2__ */ +#else #define OP_AVX_SSE2_DOUBLE_FUNC(op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX) && (1 == OMPI_MCA_OP_HAVE_AVX) */ @@ -580,12 +754,13 @@ static void OP_CONCAT(ompi_op_avx_2buff_##op##_double,PREPEND)(const void *_in, * routines, needed for some optimizations. */ #if defined(GENERATE_AVX512_CODE) && defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) +#if __AVX512F__ #define OP_AVX_AVX512_FUNC_3(name, type_sign, type_size, type, op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX512F_FLAG|OMPI_OP_AVX_HAS_AVX512BW_FLAG) ) { \ int types_per_step = (512 / 8) / sizeof(type); \ for (; left_over >= types_per_step; left_over -= types_per_step) { \ - __m512i vecA = _mm512_loadu_si512(in1); \ - __m512i vecB = _mm512_loadu_si512(in2); \ + __m512i vecA = _mm512_loadu_si512(in1); \ + __m512i vecB = _mm512_loadu_si512(in2); \ in1 += types_per_step; \ in2 += types_per_step; \ __m512i res = _mm512_##op##_ep##type_sign##type_size(vecA, vecB); \ @@ -595,10 +770,14 @@ static void OP_CONCAT(ompi_op_avx_2buff_##op##_double,PREPEND)(const void *_in, if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVX512F support needed for _mm512_loadu_si512 and _mm512_storeu_si512 +#endif /* __AVX512F__ */ +#else #define OP_AVX_AVX512_FUNC_3(name, type_sign, type_size, type, op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) */ #if defined(GENERATE_AVX2_CODE) && defined(OMPI_MCA_OP_HAVE_AVX2) && (1 == OMPI_MCA_OP_HAVE_AVX2) +#if __AVX__ #define OP_AVX_AVX2_FUNC_3(name, type_sign, type_size, type, op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX2_FLAG | OMPI_OP_AVX_HAS_AVX_FLAG) ) { \ int types_per_step = (256 / 8) / sizeof(type); \ @@ -607,17 +786,21 @@ static void OP_CONCAT(ompi_op_avx_2buff_##op##_double,PREPEND)(const void *_in, __m256i vecB = _mm256_loadu_si256((__m256i*)in2); \ in1 += types_per_step; \ in2 += types_per_step; \ - __m256i res = _mm256_##op##_ep##type_sign##type_size(vecA, vecB); \ + __m256i res = _mm256_##op##_ep##type_sign##type_size(vecA, vecB); \ _mm256_storeu_si256((__m256i*)out, res); \ out += types_per_step; \ } \ if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVX support needed for _mm256_loadu_si256 and _mm256_storeu_si256 +#endif /* __AVX__ */ +#else #define OP_AVX_AVX2_FUNC_3(name, type_sign, type_size, type, op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX2) && (1 == OMPI_MCA_OP_HAVE_AVX2) */ #if defined(GENERATE_SSE3_CODE) && defined(OMPI_MCA_OP_HAVE_SSE41) && (1 == OMPI_MCA_OP_HAVE_SSE41) && defined(OMPI_MCA_OP_HAVE_AVX) && (1 == OMPI_MCA_OP_HAVE_AVX) +#if __SSE3__ && __SSE2__ #define OP_AVX_SSE4_1_FUNC_3(name, type_sign, type_size, type, op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_SSE3_FLAG | OMPI_OP_AVX_HAS_SSE4_1_FLAG) ) { \ int types_per_step = (128 / 8) / sizeof(type); \ @@ -626,12 +809,15 @@ static void OP_CONCAT(ompi_op_avx_2buff_##op##_double,PREPEND)(const void *_in, __m128i vecB = _mm_lddqu_si128((__m128i*)in2); \ in1 += types_per_step; \ in2 += types_per_step; \ - __m128i res = _mm_##op##_ep##type_sign##type_size(vecA, vecB); \ + __m128i res = _mm_##op##_ep##type_sign##type_size(vecA, vecB); \ _mm_storeu_si128((__m128i*)out, res); \ out += types_per_step; \ } \ } #else +#error Target architecture lacks SSE2 and SSE3 support needed for _mm_lddqu_si128 and _mm_storeu_si128 +#endif /* __SSE3__ && __SSE2__ */ +#else #define OP_AVX_SSE4_1_FUNC_3(name, type_sign, type_size, type, op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX) && (1 == OMPI_MCA_OP_HAVE_AVX) */ @@ -667,12 +853,13 @@ static void OP_CONCAT(ompi_op_avx_3buff_##name##_##type,PREPEND)(const void * re } #if defined(GENERATE_AVX512_CODE) && defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) +#if __AVX512BW__ && __AVX__ #define OP_AVX_AVX512_MUL_3(name, type_sign, type_size, type, op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX512F_FLAG | OMPI_OP_AVX_HAS_AVX512BW_FLAG) ) { \ int types_per_step = (256 / 8) / sizeof(type); \ for (; left_over >= types_per_step; left_over -= types_per_step) { \ - __m256i vecA_tmp = _mm256_loadu_si256((__m256i*)in1); \ - __m256i vecB_tmp = _mm256_loadu_si256((__m256i*)in2); \ + __m256i vecA_tmp = _mm256_loadu_si256((__m256i*)in1); \ + __m256i vecB_tmp = _mm256_loadu_si256((__m256i*)in2); \ in1 += types_per_step; \ in2 += types_per_step; \ __m512i vecA = _mm512_cvtepi8_epi16(vecA_tmp); \ @@ -685,6 +872,9 @@ static void OP_CONCAT(ompi_op_avx_3buff_##name##_##type,PREPEND)(const void * re if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVX512BW and AVX support needed for _mm256_loadu_si256, _mm256_storeu_si256 and _mm512_cvtepi8_epi16 +#endif /* __AVX512BW__ && __AVX__ */ +#else #define OP_AVX_AVX512_MUL_3(name, type_sign, type_size, type, op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) */ /** @@ -723,12 +913,13 @@ static void OP_CONCAT(ompi_op_avx_3buff_##name##_##type,PREPEND)(const void * re } #if defined(GENERATE_AVX512_CODE) && defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) +#if __AVX512F__ #define OP_AVX_AVX512_BIT_FUNC_3(name, type_size, type, op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX512F_FLAG) ) { \ types_per_step = (512 / 8) / sizeof(type); \ for (; left_over >= types_per_step; left_over -= types_per_step) { \ - __m512i vecA = _mm512_loadu_si512(in1); \ - __m512i vecB = _mm512_loadu_si512(in2); \ + __m512i vecA = _mm512_loadu_si512(in1); \ + __m512i vecB = _mm512_loadu_si512(in2); \ in1 += types_per_step; \ in2 += types_per_step; \ __m512i res = _mm512_##op##_si512(vecA, vecB); \ @@ -738,10 +929,14 @@ static void OP_CONCAT(ompi_op_avx_3buff_##name##_##type,PREPEND)(const void * re if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVX512F support needed for _mm512_loadu_si512 and _mm512_storeu_si512 +#endif /* __AVX512F__ */ +#else #define OP_AVX_AVX512_BIT_FUNC_3(name, type_size, type, op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) */ #if defined(GENERATE_AVX2_CODE) && defined(OMPI_MCA_OP_HAVE_AVX2) && (1 == OMPI_MCA_OP_HAVE_AVX2) +#if __AVX__ #define OP_AVX_AVX2_BIT_FUNC_3(name, type_size, type, op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX2_FLAG | OMPI_OP_AVX_HAS_AVX_FLAG) ) { \ types_per_step = (256 / 8) / sizeof(type); \ @@ -750,17 +945,21 @@ static void OP_CONCAT(ompi_op_avx_3buff_##name##_##type,PREPEND)(const void * re __m256i vecB = _mm256_loadu_si256((__m256i*)in2); \ in1 += types_per_step; \ in2 += types_per_step; \ - __m256i res = _mm256_##op##_si256(vecA, vecB); \ + __m256i res = _mm256_##op##_si256(vecA, vecB); \ _mm256_storeu_si256((__m256i*)out, res); \ out += types_per_step; \ } \ if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVX support needed for _mm256_loadu_si256 and _mm256_storeu_si256 +#endif /* __AVX__ */ +#else #define OP_AVX_AVX2_BIT_FUNC_3(name, type_size, type, op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX2) && (1 == OMPI_MCA_OP_HAVE_AVX2) */ #if defined(GENERATE_SSE3_CODE) && defined(OMPI_MCA_OP_HAVE_AVX) && (1 == OMPI_MCA_OP_HAVE_AVX) +#if __SSE3__ && __SSE2__ #define OP_AVX_SSE3_BIT_FUNC_3(name, type_size, type, op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_SSE3_FLAG) ) { \ types_per_step = (128 / 8) / sizeof(type); \ @@ -769,12 +968,15 @@ static void OP_CONCAT(ompi_op_avx_3buff_##name##_##type,PREPEND)(const void * re __m128i vecB = _mm_lddqu_si128((__m128i*)in2); \ in1 += types_per_step; \ in2 += types_per_step; \ - __m128i res = _mm_##op##_si128(vecA, vecB); \ + __m128i res = _mm_##op##_si128(vecA, vecB); \ _mm_storeu_si128((__m128i*)out, res); \ out += types_per_step; \ } \ } #else +#error Target architecture lacks SSE2 and SSE3 support needed for _mm_lddqu_si128 and _mm_storeu_si128 +#endif /* __SSE3__ && __SSE2__ */ +#else #define OP_AVX_SSE3_BIT_FUNC_3(name, type_size, type, op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX) && (1 == OMPI_MCA_OP_HAVE_AVX) */ @@ -809,12 +1011,13 @@ static void OP_CONCAT(ompi_op_avx_3buff_##op##_##type,PREPEND)(const void *_in1, } #if defined(GENERATE_AVX512_CODE) && defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) +#if __AVX512F__ #define OP_AVX_AVX512_FLOAT_FUNC_3(op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX512F_FLAG) ) { \ types_per_step = (512 / 8) / sizeof(float); \ for (; left_over >= types_per_step; left_over -= types_per_step) { \ - __m512 vecA = _mm512_loadu_ps(in1); \ - __m512 vecB = _mm512_loadu_ps(in2); \ + __m512 vecA = _mm512_loadu_ps(in1); \ + __m512 vecB = _mm512_loadu_ps(in2); \ in1 += types_per_step; \ in2 += types_per_step; \ __m512 res = _mm512_##op##_ps(vecA, vecB); \ @@ -824,16 +1027,20 @@ static void OP_CONCAT(ompi_op_avx_3buff_##op##_##type,PREPEND)(const void *_in1, if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVX512F support needed for _mm512_loadu_ps and _mm512_storeu_ps +#endif /* __AVX512F__ */ +#else #define OP_AVX_AVX512_FLOAT_FUNC_3(op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) */ #if defined(GENERATE_AVX2_CODE) && defined(OMPI_MCA_OP_HAVE_AVX2) && (1 == OMPI_MCA_OP_HAVE_AVX2) +#if __AVX__ #define OP_AVX_AVX_FLOAT_FUNC_3(op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX_FLAG) ) { \ types_per_step = (256 / 8) / sizeof(float); \ for( ; left_over >= types_per_step; left_over -= types_per_step ) { \ - __m256 vecA = _mm256_loadu_ps(in1); \ - __m256 vecB = _mm256_loadu_ps(in2); \ + __m256 vecA = _mm256_loadu_ps(in1); \ + __m256 vecB = _mm256_loadu_ps(in2); \ in1 += types_per_step; \ in2 += types_per_step; \ __m256 res = _mm256_##op##_ps(vecA, vecB); \ @@ -843,10 +1050,14 @@ static void OP_CONCAT(ompi_op_avx_3buff_##op##_##type,PREPEND)(const void *_in1, if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVX support needed for _mm256_loadu_ps and _mm256_storeu_ps +#endif /* __AVX__ */ +#else #define OP_AVX_AVX_FLOAT_FUNC_3(op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX2) && (1 == OMPI_MCA_OP_HAVE_AVX2) */ #if defined(GENERATE_AVX_CODE) && defined(OMPI_MCA_OP_HAVE_AVX) && (1 == OMPI_MCA_OP_HAVE_AVX) +#if __SSE__ #define OP_AVX_SSE_FLOAT_FUNC_3(op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_SSE_FLAG) ) { \ types_per_step = (128 / 8) / sizeof(float); \ @@ -861,6 +1072,9 @@ static void OP_CONCAT(ompi_op_avx_3buff_##op##_##type,PREPEND)(const void *_in1, } \ } #else +#error Target architecture lacks SSE support needed for _mm_loadu_ps and _mm_storeu_ps +#endif /* __SSE__ */ +#else #define OP_AVX_SSE_FLOAT_FUNC_3(op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX) && (1 == OMPI_MCA_OP_HAVE_AVX) */ @@ -895,12 +1109,13 @@ static void OP_CONCAT(ompi_op_avx_3buff_##op##_float,PREPEND)(const void *_in1, } #if defined(GENERATE_AVX512_CODE) && defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) +#if __AVX512F__ #define OP_AVX_AVX512_DOUBLE_FUNC_3(op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX512F_FLAG) ) { \ types_per_step = (512 / 8) / sizeof(double); \ for (; left_over >= types_per_step; left_over -= types_per_step) { \ - __m512d vecA = _mm512_loadu_pd((in1)); \ - __m512d vecB = _mm512_loadu_pd((in2)); \ + __m512d vecA = _mm512_loadu_pd((in1)); \ + __m512d vecB = _mm512_loadu_pd((in2)); \ in1 += types_per_step; \ in2 += types_per_step; \ __m512d res = _mm512_##op##_pd(vecA, vecB); \ @@ -910,16 +1125,20 @@ static void OP_CONCAT(ompi_op_avx_3buff_##op##_float,PREPEND)(const void *_in1, if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVXF512 support needed for _mm512_loadu_pd and _mm512_storeu_pd +#endif /* __AVXF512__ */ +#else #define OP_AVX_AVX512_DOUBLE_FUNC_3(op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX512) && (1 == OMPI_MCA_OP_HAVE_AVX512) */ #if defined(GENERATE_AVX2_CODE) && defined(OMPI_MCA_OP_HAVE_AVX2) && (1 == OMPI_MCA_OP_HAVE_AVX2) +#if __AVX__ #define OP_AVX_AVX_DOUBLE_FUNC_3(op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_AVX_FLAG) ) { \ types_per_step = (256 / 8) / sizeof(double); \ for( ; left_over >= types_per_step; left_over -= types_per_step ) { \ - __m256d vecA = _mm256_loadu_pd(in1); \ - __m256d vecB = _mm256_loadu_pd(in2); \ + __m256d vecA = _mm256_loadu_pd(in1); \ + __m256d vecB = _mm256_loadu_pd(in2); \ in1 += types_per_step; \ in2 += types_per_step; \ __m256d res = _mm256_##op##_pd(vecA, vecB); \ @@ -929,10 +1148,14 @@ static void OP_CONCAT(ompi_op_avx_3buff_##op##_float,PREPEND)(const void *_in1, if( 0 == left_over ) return; \ } #else +#error Target architecture lacks AVX support needed for _mm256_loadu_pd and _mm256_storeu_pd +#endif /* __AVX__ */ +#else #define OP_AVX_AVX_DOUBLE_FUNC_3(op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX2) && (1 == OMPI_MCA_OP_HAVE_AVX2) */ #if defined(GENERATE_AVX_CODE) && defined(OMPI_MCA_OP_HAVE_AVX) && (1 == OMPI_MCA_OP_HAVE_AVX) +#if __SSE2__ #define OP_AVX_SSE2_DOUBLE_FUNC_3(op) \ if( OMPI_OP_AVX_HAS_FLAGS(OMPI_OP_AVX_HAS_SSE2_FLAG) ) { \ types_per_step = (128 / 8) / sizeof(double); \ @@ -947,6 +1170,9 @@ static void OP_CONCAT(ompi_op_avx_3buff_##op##_float,PREPEND)(const void *_in1, } \ } #else +#error Target architecture lacks SSE2 support needed for _mm_loadu_pd and _mm_storeu_pd +#endif /* __SSE2__ */ +#else #define OP_AVX_SSE2_DOUBLE_FUNC_3(op) {} #endif /* defined(OMPI_MCA_OP_HAVE_AVX) && (1 == OMPI_MCA_OP_HAVE_AVX) */ diff --git a/ompi/mca/osc/rdma/osc_rdma_frag.h b/ompi/mca/osc/rdma/osc_rdma_frag.h index f174f48e7ff..fb271840fce 100644 --- a/ompi/mca/osc/rdma/osc_rdma_frag.h +++ b/ompi/mca/osc/rdma/osc_rdma_frag.h @@ -3,6 +3,7 @@ * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2020 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,6 +51,7 @@ static inline int ompi_osc_rdma_frag_alloc (ompi_osc_rdma_module_t *module, size if (NULL == curr) { opal_free_list_item_t *item = NULL; + void *_tmp_ptr = NULL; item = opal_free_list_get (&mca_osc_rdma_component.frags); if (OPAL_UNLIKELY(NULL == item)) { @@ -72,7 +74,7 @@ static inline int ompi_osc_rdma_frag_alloc (ompi_osc_rdma_module_t *module, size } } - if (!opal_atomic_compare_exchange_strong_ptr ((opal_atomic_intptr_t *) &module->rdma_frag, &(intptr_t){0}, (intptr_t) curr)) { + if (!opal_atomic_compare_exchange_strong_ptr ((opal_atomic_intptr_t *) &module->rdma_frag, (intptr_t *) &_tmp_ptr, (intptr_t) curr)) { ompi_osc_rdma_deregister (module, curr->handle); curr->handle = NULL; diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index 1e5c19d4aca..b4178a81b02 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -521,7 +521,7 @@ void mca_pml_ob1_recv_frag_callback_rndv (mca_btl_base_module_t *btl, if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) ) { return; } - ob1_hdr_ntoh(hdr, MCA_PML_OB1_HDR_TYPE_RNDV); + ob1_hdr_ntoh((mca_pml_ob1_hdr_t*)hdr, MCA_PML_OB1_HDR_TYPE_RNDV); mca_pml_ob1_recv_frag_match(btl, &hdr->hdr_match, segments, descriptor->des_segment_count, MCA_PML_OB1_HDR_TYPE_RNDV); } @@ -535,7 +535,7 @@ void mca_pml_ob1_recv_frag_callback_rget (mca_btl_base_module_t *btl, if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) ) { return; } - ob1_hdr_ntoh(hdr, MCA_PML_OB1_HDR_TYPE_RGET); + ob1_hdr_ntoh((mca_pml_ob1_hdr_t*)hdr, MCA_PML_OB1_HDR_TYPE_RGET); mca_pml_ob1_recv_frag_match(btl, &hdr->hdr_match, segments, descriptor->des_segment_count, MCA_PML_OB1_HDR_TYPE_RGET); } @@ -552,7 +552,7 @@ void mca_pml_ob1_recv_frag_callback_ack (mca_btl_base_module_t *btl, return; } - ob1_hdr_ntoh(hdr, MCA_PML_OB1_HDR_TYPE_ACK); + ob1_hdr_ntoh((mca_pml_ob1_hdr_t*)hdr, MCA_PML_OB1_HDR_TYPE_ACK); sendreq = (mca_pml_ob1_send_request_t *) hdr->hdr_ack.hdr_src_req.pval; sendreq->req_recv = hdr->hdr_ack.hdr_dst_req; @@ -614,7 +614,7 @@ void mca_pml_ob1_recv_frag_callback_frag (mca_btl_base_module_t *btl, return; } - ob1_hdr_ntoh(hdr, MCA_PML_OB1_HDR_TYPE_FRAG); + ob1_hdr_ntoh((mca_pml_ob1_hdr_t*)hdr, MCA_PML_OB1_HDR_TYPE_FRAG); recvreq = (mca_pml_ob1_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval; #if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */ /* If data is destined for GPU buffer and convertor was set up for asynchronous @@ -648,7 +648,7 @@ void mca_pml_ob1_recv_frag_callback_put (mca_btl_base_module_t *btl, return; } - ob1_hdr_ntoh (hdr, MCA_PML_OB1_HDR_TYPE_PUT); + ob1_hdr_ntoh ((mca_pml_ob1_hdr_t*)hdr, MCA_PML_OB1_HDR_TYPE_PUT); sendreq = (mca_pml_ob1_send_request_t *) hdr->hdr_rdma.hdr_req.pval; mca_pml_ob1_send_request_put (sendreq, btl, &hdr->hdr_rdma); } diff --git a/ompi/mpi/man/man3/MPI_Init_thread.3in b/ompi/mpi/man/man3/MPI_Init_thread.3in index c6202bfa0a2..800e1cb381c 100644 --- a/ompi/mpi/man/man3/MPI_Init_thread.3in +++ b/ompi/mpi/man/man3/MPI_Init_thread.3in @@ -1,7 +1,7 @@ .\" -*- nroff -*- .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation -.\" Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved. .\" Copyright (c) 2020 Google, LLC. All rights reserved. .\" $COPYRIGHT$ .TH MPI_Init_thread 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" @@ -128,52 +128,27 @@ opening files, reading standard input, or writing to standard output. . .SH MPI_THREAD_MULTIPLE Support . -MPI_THREAD_MULTIPLE support is included if Open MPI was configured -with the --enable-mpi-thread-multiple configure switch. You can check the +MPI_THREAD_MULTIPLE support is included if the environment in which +Open MPI was built supports threading. You can check the output of .BR ompi_info (1) to see if Open MPI has MPI_THREAD_MULTIPLE support: . .PP .nf -shell$ ompi_info | grep -i thread - Thread support: posix (mpi: yes, progress: no) +shell$ ompi_info | grep "Thread support" + Thread support: posix (MPI_THREAD_MULTIPLE: yes, OPAL support: yes, OMPI progress: no, Event lib: yes) shell$ .fi . .PP -The "mpi: yes" portion of the above output indicates that Open MPI was -compiled with MPI_THREAD_MULTIPLE support. +The "MPI_THREAD_MULTIPLE: yes" portion of the above output indicates +that Open MPI was compiled with MPI_THREAD_MULTIPLE support. . .PP -Note that MPI_THREAD_MULTIPLE support is only lightly tested. It -likely does not work for thread-intensive applications. Also note -that -.I only -the MPI point-to-point communication functions for the BTL's listed -below are considered thread safe. Other support functions (e.g., MPI -attributes) have not been certified as safe when simultaneously used -by multiple threads. -. -.PP -.nf - tcp - sm - mx - elan - self -.fi -. -.PP -Note that Open MPI's thread support is in a fairly early stage; the -above devices are likely to -.IR work , -but the latency is likely to be fairly high. Specifically, efforts so -far have concentrated on -.IR correctness , -not -.I performance -(yet). +Note that there is a small performance penalty for using +MPI_THREAD_MULTIPLE support; latencies for short messages will be +higher as compared to when using MPI_THREAD_SINGLE, for example. . . .SH ERRORS diff --git a/ompi/op/op.c b/ompi/op/op.c index cbfcc461c42..39732a40187 100644 --- a/ompi/op/op.c +++ b/ompi/op/op.c @@ -383,22 +383,6 @@ ompi_op_t *ompi_op_create_user(bool commute, } -/* - * See lengthy comment in mpi/cxx/intercepts.cc for how the C++ MPI::Op - * callbacks work. - */ -void ompi_op_set_cxx_callback(ompi_op_t *op, MPI_User_function *fn) -{ - op->o_flags |= OMPI_OP_FLAGS_CXX_FUNC; - /* The OMPI C++ intercept was previously stored in - op->o_func.fort_fn by ompi_op_create_user(). So save that in - cxx.intercept_fn and put the user's fn in cxx.user_fn. */ - op->o_func.cxx_data.intercept_fn = - (ompi_op_cxx_handler_fn_t *) op->o_func.fort_fn; - op->o_func.cxx_data.user_fn = fn; -} - - /* * See lengthy comment in mpi/cxx/intercepts.cc for how the C++ MPI::Op * callbacks work. diff --git a/ompi/op/op.h b/ompi/op/op.h index 742b7461f1e..2cddcc97bb2 100644 --- a/ompi/op/op.h +++ b/ompi/op/op.h @@ -67,25 +67,9 @@ typedef void (ompi_op_c_handler_fn_t)(void *, void *, int *, typedef void (ompi_op_fortran_handler_fn_t)(void *, void *, MPI_Fint *, MPI_Fint *); -/** - * Typedef for C++ op functions intercept (used for user-defined - * MPI::Ops). - * - * See the lengthy explanation for why this is different than the C - * intercept in ompi/mpi/cxx/intercepts.cc in the - * ompi_mpi_cxx_op_intercept() function. - */ -typedef void (ompi_op_cxx_handler_fn_t)(void *, void *, int *, - struct ompi_datatype_t **, - MPI_User_function * op); - /** * Typedef for Java op functions intercept (used for user-defined * MPI.Ops). - * - * See the lengthy explanation for why this is different than the C - * intercept in ompi/mpi/cxx/intercepts.cc in the - * ompi_mpi_cxx_op_intercept() function. */ typedef void (ompi_op_java_handler_fn_t)(void *, void *, int *, struct ompi_datatype_t **, @@ -99,8 +83,6 @@ typedef void (ompi_op_java_handler_fn_t)(void *, void *, int *, #define OMPI_OP_FLAGS_INTRINSIC 0x0001 /** Set if the callback function is in Fortran */ #define OMPI_OP_FLAGS_FORTRAN_FUNC 0x0002 -/** Set if the callback function is in C++ */ -#define OMPI_OP_FLAGS_CXX_FUNC 0x0004 /** Set if the callback function is in Java */ #define OMPI_OP_FLAGS_JAVA_FUNC 0x0008 /** Set if the callback function is associative (MAX and SUM will both @@ -171,15 +153,7 @@ struct ompi_op_t { ompi_op_c_handler_fn_t *c_fn; /** Fortran handler function pointer */ ompi_op_fortran_handler_fn_t *fort_fn; - /** C++ intercept function data -- see lengthy comment in - ompi/mpi/cxx/intercepts.cc::ompi_mpi_cxx_op_intercept() for - an explanation */ - struct { - /* The user's function (it's the wrong type, but that's ok) */ - ompi_op_c_handler_fn_t *user_fn; - /* The OMPI C++ callback/intercept function */ - ompi_op_cxx_handler_fn_t *intercept_fn; - } cxx_data; + /** Java intercept function data */ struct { /* The OMPI C++ callback/intercept function */ ompi_op_java_handler_fn_t *intercept_fn; @@ -393,17 +367,8 @@ ompi_op_t *ompi_op_create_user(bool commute, ompi_op_fortran_handler_fn_t func); /** - * Mark an MPI_Op as holding a C++ callback function, and cache - * that function in the MPI_Op. See a lenghty comment in - * ompi/mpi/cxx/op.c::ompi_mpi_cxx_op_intercept() for a full - * expalantion. - */ -OMPI_DECLSPEC void ompi_op_set_cxx_callback(ompi_op_t * op, - MPI_User_function * fn); - -/** - * Similar to ompi_op_set_cxx_callback(), mark an MPI_Op as holding a - * Java calback function, and cache that function in the MPI_Op. + * Mark an MPI_Op as holding a Java calback function, and cache that + * function in the MPI_Op. */ OMPI_DECLSPEC void ompi_op_set_java_callback(ompi_op_t *op, void *jnienv, void *object, int baseType); @@ -594,10 +559,6 @@ static inline void ompi_op_reduce(ompi_op_t * op, void *source, f_count = OMPI_INT_2_FINT(count); op->o_func.fort_fn(source, target, &f_count, &f_dtype); return; - } else if (0 != (op->o_flags & OMPI_OP_FLAGS_CXX_FUNC)) { - op->o_func.cxx_data.intercept_fn(source, target, &count, &dtype, - op->o_func.cxx_data.user_fn); - return; } else if (0 != (op->o_flags & OMPI_OP_FLAGS_JAVA_FUNC)) { op->o_func.java_data.intercept_fn(source, target, &count, &dtype, op->o_func.java_data.baseType, diff --git a/ompi/request/grequest.c b/ompi/request/grequest.c index c895b4232b6..02affd642aa 100644 --- a/ompi/request/grequest.c +++ b/ompi/request/grequest.c @@ -163,6 +163,7 @@ int ompi_grequest_start( greq->greq_free.c_free = gfree_fn; greq->greq_cancel.c_cancel = gcancel_fn; greq->greq_base.req_status = ompi_status_empty; + greq->greq_base.req_complete = REQUEST_PENDING; *request = &greq->greq_base; return OMPI_SUCCESS; diff --git a/ompi/runtime/ompi_mpi_params.c b/ompi/runtime/ompi_mpi_params.c index ad8ed8ac91e..5d5ab6f03e4 100644 --- a/ompi/runtime/ompi_mpi_params.c +++ b/ompi/runtime/ompi_mpi_params.c @@ -334,9 +334,10 @@ int ompi_mpi_register_params(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_mpi_compat_mpi3); +#if SPC_ENABLE == 1 ompi_mpi_spc_attach_string = NULL; (void) mca_base_var_register("ompi", "mpi", NULL, "spc_attach", - "A comma delimeted string listing the software-based performance counters (SPCs) to enable.", + "A comma-delimeted list of software-based performance counters (SPCs) to enable (\"all\" enables all counters).", MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_4, MCA_BASE_VAR_SCOPE_READONLY, @@ -344,11 +345,12 @@ int ompi_mpi_register_params(void) ompi_mpi_spc_dump_enabled = false; (void) mca_base_var_register("ompi", "mpi", NULL, "spc_dump_enabled", - "A boolean value for whether (true) or not (false) to enable dumping SPC counters in MPI_Finalize.", + "A boolean value for whether (true) or not (false) to enable dumping enabled SPC counters in MPI_Finalize.", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_4, MCA_BASE_VAR_SCOPE_READONLY, &ompi_mpi_spc_dump_enabled); +#endif // SPC_ENABLE return OMPI_SUCCESS; } diff --git a/ompi/runtime/ompi_rte.c b/ompi/runtime/ompi_rte.c index 480c2252460..0c0d4ee07ed 100644 --- a/ompi/runtime/ompi_rte.c +++ b/ompi/runtime/ompi_rte.c @@ -14,6 +14,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights * reserved. + * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ */ #include "ompi_config.h" @@ -996,7 +997,7 @@ void ompi_rte_abort_peers(opal_process_name_t *procs, } static size_t handler = SIZE_MAX; -static bool debugger_event_active = true; +static volatile bool debugger_event_active = true; static void _release_fn(size_t refid, pmix_status_t status, const pmix_proc_t *source, @@ -1014,14 +1015,14 @@ static void _release_fn(size_t refid, pmix_status_t status, /* * Wait for a debugger if asked. We support two ways of waiting for - * attaching debuggers -- see big comment in - * pmix/tools/pmixrun/debuggers.c explaining the two scenarios. + * attaching debuggers */ void ompi_rte_wait_for_debugger(void) { pmix_info_t directive; char *evar; int time, code = PMIX_ERR_DEBUGGER_RELEASE; + pmix_info_t info; /* check PMIx to see if we are under a debugger */ if (NULL == getenv("PMIX_DEBUG_WAIT_FOR_NOTIFY") && @@ -1046,6 +1047,13 @@ void ompi_rte_wait_for_debugger(void) PMIx_Register_event_handler(&code, 1, &directive, 1, _release_fn, NULL, NULL); PMIX_INFO_DESTRUCT(&directive); + /* notify the host that we are waiting */ + PMIX_INFO_LOAD(&info, PMIX_EVENT_NON_DEFAULT, NULL, PMIX_BOOL); + PMIx_Notify_event(PMIX_DEBUG_WAITING_FOR_NOTIFY, + &opal_process_info.myprocid, + PMIX_RANGE_RM, &info, 1, NULL, NULL); + PMIX_INFO_DESTRUCT(&info); + /* let the MPI progress engine run while we wait for debugger release */ OMPI_WAIT_FOR_COMPLETION(debugger_event_active); diff --git a/ompi/runtime/ompi_spc.c b/ompi/runtime/ompi_spc.c index 099934c658c..4c0ed5a1b5e 100644 --- a/ompi/runtime/ompi_spc.c +++ b/ompi/runtime/ompi_spc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 The University of Tennessee and The University + * Copyright (c) 2018-2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * @@ -16,11 +16,30 @@ * $HEADER$ */ -#include "ompi_spc.h" +#include "ompi_config.h" + +#include +#include +#include +#include + +#include "ompi/runtime/ompi_spc.h" +#include "ompi/runtime/params.h" + +#include "ompi/communicator/communicator.h" +#include "ompi/datatype/ompi_datatype.h" +#include "opal/mca/timer/timer.h" +#include "opal/mca/base/mca_base_pvar.h" +#include "opal/util/argv.h" +#include "opal/util/show_help.h" +#include "opal/util/output.h" + +#if SPC_ENABLE == 1 static opal_timer_t sys_clock_freq_mhz = 0; static void ompi_spc_dump(void); +static ompi_spc_value_t ompi_spc_cycles_to_usecs_internal(opal_timer_t cycles); /* Array for converting from SPC indices to MPI_T indices */ static bool mpi_t_enabled = false; @@ -29,154 +48,133 @@ static ompi_communicator_t *ompi_spc_comm = NULL; typedef struct ompi_spc_event_t { const char* counter_name; const char* counter_description; + bool is_high_watermark; + bool is_timer_event; } ompi_spc_event_t; -#define SET_COUNTER_ARRAY(NAME, DESC) [NAME] = { .counter_name = #NAME, .counter_description = DESC } - -static ompi_spc_event_t ompi_spc_events_names[OMPI_SPC_NUM_COUNTERS] = { - SET_COUNTER_ARRAY(OMPI_SPC_SEND, "The number of times MPI_Send was called."), - SET_COUNTER_ARRAY(OMPI_SPC_BSEND, "The number of times MPI_Bsend was called."), - SET_COUNTER_ARRAY(OMPI_SPC_RSEND, "The number of times MPI_Rsend was called."), - SET_COUNTER_ARRAY(OMPI_SPC_SSEND, "The number of times MPI_Ssend was called."), - SET_COUNTER_ARRAY(OMPI_SPC_RECV, "The number of times MPI_Recv was called."), - SET_COUNTER_ARRAY(OMPI_SPC_MRECV, "The number of times MPI_Mrecv was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ISEND, "The number of times MPI_Isend was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IBSEND, "The number of times MPI_Ibsend was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IRSEND, "The number of times MPI_Irsend was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ISSEND, "The number of times MPI_Issend was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IRECV, "The number of times MPI_Irecv was called."), - SET_COUNTER_ARRAY(OMPI_SPC_SENDRECV, "The number of times MPI_Sendrecv was called."), - SET_COUNTER_ARRAY(OMPI_SPC_SENDRECV_REPLACE, "The number of times MPI_Sendrecv_replace was called."), - SET_COUNTER_ARRAY(OMPI_SPC_PUT, "The number of times MPI_Put was called."), - SET_COUNTER_ARRAY(OMPI_SPC_RPUT, "The number of times MPI_Rput was called."), - SET_COUNTER_ARRAY(OMPI_SPC_GET, "The number of times MPI_Get was called."), - SET_COUNTER_ARRAY(OMPI_SPC_RGET, "The number of times MPI_Rget was called."), - SET_COUNTER_ARRAY(OMPI_SPC_PROBE, "The number of times MPI_Probe was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IPROBE, "The number of times MPI_Iprobe was called."), - SET_COUNTER_ARRAY(OMPI_SPC_BCAST, "The number of times MPI_Bcast was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IBCAST, "The number of times MPI_Ibcast was called."), - SET_COUNTER_ARRAY(OMPI_SPC_BCAST_INIT, "The number of times MPIX_Bcast_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_REDUCE, "The number of times MPI_Reduce was called."), - SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_SCATTER, "The number of times MPI_Reduce_scatter was called."), - SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_SCATTER_BLOCK, "The number of times MPI_Reduce_scatter_block was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IREDUCE, "The number of times MPI_Ireduce was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IREDUCE_SCATTER, "The number of times MPI_Ireduce_scatter was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IREDUCE_SCATTER_BLOCK, "The number of times MPI_Ireduce_scatter_block was called."), - SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_INIT, "The number of times MPIX_Reduce_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_SCATTER_INIT, "The number of times MPIX_Reduce_scatter_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_SCATTER_BLOCK_INIT, "The number of times MPIX_Reduce_scatter_block_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ALLREDUCE, "The number of times MPI_Allreduce was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IALLREDUCE, "The number of times MPI_Iallreduce was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ALLREDUCE_INIT, "The number of times MPIX_Allreduce_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_SCAN, "The number of times MPI_Scan was called."), - SET_COUNTER_ARRAY(OMPI_SPC_EXSCAN, "The number of times MPI_Exscan was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ISCAN, "The number of times MPI_Iscan was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IEXSCAN, "The number of times MPI_Iexscan was called."), - SET_COUNTER_ARRAY(OMPI_SPC_SCAN_INIT, "The number of times MPIX_Scan_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_EXSCAN_INIT, "The number of times MPIX_Exscan_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_SCATTER, "The number of times MPI_Scatter was called."), - SET_COUNTER_ARRAY(OMPI_SPC_SCATTERV, "The number of times MPI_Scatterv was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ISCATTER, "The number of times MPI_Iscatter was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ISCATTERV, "The number of times MPI_Iscatterv was called."), - SET_COUNTER_ARRAY(OMPI_SPC_SCATTER_INIT, "The number of times MPIX_Scatter_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_SCATTERV_INIT, "The number of times MPIX_Scatterv_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_GATHER, "The number of times MPI_Gather was called."), - SET_COUNTER_ARRAY(OMPI_SPC_GATHERV, "The number of times MPI_Gatherv was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IGATHER, "The number of times MPI_Igather was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IGATHERV, "The number of times MPI_Igatherv was called."), - SET_COUNTER_ARRAY(OMPI_SPC_GATHER_INIT, "The number of times MPIX_Gather_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_GATHERV_INIT, "The number of times MPIX_Gatherv_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALL, "The number of times MPI_Alltoall was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALLV, "The number of times MPI_Alltoallv was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALLW, "The number of times MPI_Alltoallw was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IALLTOALL, "The number of times MPI_Ialltoall was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IALLTOALLV, "The number of times MPI_Ialltoallv was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IALLTOALLW, "The number of times MPI_Ialltoallw was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALL_INIT, "The number of times MPIX_Alltoall_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALLV_INIT, "The number of times MPIX_Alltoallv_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALLW_INIT, "The number of times MPIX_Alltoallw_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALL, "The number of times MPI_Neighbor_alltoall was called."), - SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALLV, "The number of times MPI_Neighbor_alltoallv was called."), - SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALLW, "The number of times MPI_Neighbor_alltoallw was called."), - SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLTOALL, "The number of times MPI_Ineighbor_alltoall was called."), - SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLTOALLV, "The number of times MPI_Ineighbor_alltoallv was called."), - SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLTOALLW, "The number of times MPI_Ineighbor_alltoallw was called."), - SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALL_INIT, "The number of times MPIX_Neighbor_alltoall_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALLV_INIT, "The number of times MPIX_Neighbor_alltoallv_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALLW_INIT, "The number of times MPIX_Neighbor_alltoallw_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ALLGATHER, "The number of times MPI_Allgather was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ALLGATHERV, "The number of times MPI_Allgatherv was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IALLGATHER, "The number of times MPI_Iallgather was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IALLGATHERV, "The number of times MPI_Iallgatherv was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ALLGATHER_INIT, "The number of times MPIX_Allgather_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_ALLGATHERV_INIT, "The number of times MPIX_Allgatherv_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLGATHER, "The number of times MPI_Neighbor_allgather was called."), - SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLGATHERV, "The number of times MPI_Neighbor_allgatherv was called."), - SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLGATHER, "The number of times MPI_Ineighbor_allgather was called."), - SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLGATHERV, "The number of times MPI_Ineighbor_allgatherv was called."), - SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLGATHER_INIT, "The number of times MPIX_Neighbor_allgather_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLGATHERV_INIT, "The number of times MPIX_Neighbor_allgatherv_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_TEST, "The number of times MPI_Test was called."), - SET_COUNTER_ARRAY(OMPI_SPC_TESTALL, "The number of times MPI_Testall was called."), - SET_COUNTER_ARRAY(OMPI_SPC_TESTANY, "The number of times MPI_Testany was called."), - SET_COUNTER_ARRAY(OMPI_SPC_TESTSOME, "The number of times MPI_Testsome was called."), - SET_COUNTER_ARRAY(OMPI_SPC_WAIT, "The number of times MPI_Wait was called."), - SET_COUNTER_ARRAY(OMPI_SPC_WAITALL, "The number of times MPI_Waitall was called."), - SET_COUNTER_ARRAY(OMPI_SPC_WAITANY, "The number of times MPI_Waitany was called."), - SET_COUNTER_ARRAY(OMPI_SPC_WAITSOME, "The number of times MPI_Waitsome was called."), - SET_COUNTER_ARRAY(OMPI_SPC_BARRIER, "The number of times MPI_Barrier was called."), - SET_COUNTER_ARRAY(OMPI_SPC_IBARRIER, "The number of times MPI_Ibarrier was called."), - SET_COUNTER_ARRAY(OMPI_SPC_BARRIER_INIT, "The number of times MPIX_Barrier_init was called."), - SET_COUNTER_ARRAY(OMPI_SPC_WTIME, "The number of times MPI_Wtime was called."), - SET_COUNTER_ARRAY(OMPI_SPC_CANCEL, "The number of times MPI_Cancel was called."), - SET_COUNTER_ARRAY(OMPI_SPC_BYTES_RECEIVED_USER, "The number of bytes received by the user through point-to-point communications. Note: Includes bytes transferred using internal RMA operations."), - SET_COUNTER_ARRAY(OMPI_SPC_BYTES_RECEIVED_MPI, "The number of bytes received by MPI through collective, control, or other internal communications."), - SET_COUNTER_ARRAY(OMPI_SPC_BYTES_SENT_USER, "The number of bytes sent by the user through point-to-point communications. Note: Includes bytes transferred using internal RMA operations."), - SET_COUNTER_ARRAY(OMPI_SPC_BYTES_SENT_MPI, "The number of bytes sent by MPI through collective, control, or other internal communications."), - SET_COUNTER_ARRAY(OMPI_SPC_BYTES_PUT, "The number of bytes sent/received using RMA Put operations both through user-level Put functions and internal Put functions."), - SET_COUNTER_ARRAY(OMPI_SPC_BYTES_GET, "The number of bytes sent/received using RMA Get operations both through user-level Get functions and internal Get functions."), - SET_COUNTER_ARRAY(OMPI_SPC_UNEXPECTED, "The number of messages that arrived as unexpected messages."), - SET_COUNTER_ARRAY(OMPI_SPC_OUT_OF_SEQUENCE, "The number of messages that arrived out of the proper sequence."), - SET_COUNTER_ARRAY(OMPI_SPC_MATCH_TIME, "The number of microseconds spent matching unexpected messages. Note: The timer used on the back end is in cycles, which could potentially be problematic on a system where the clock frequency can change. On such a system, this counter could be inaccurate since we assume a fixed clock rate."), - SET_COUNTER_ARRAY(OMPI_SPC_UNEXPECTED_IN_QUEUE, "The number of messages that are currently in the unexpected message queue(s) of an MPI process."), - SET_COUNTER_ARRAY(OMPI_SPC_OOS_IN_QUEUE, "The number of messages that are currently in the out of sequence message queue(s) of an MPI process."), +#define SET_COUNTER_ARRAY(NAME, DESC, HWM, ITE) [NAME] = { .counter_name = #NAME, .counter_description = DESC, \ + .is_high_watermark = HWM, .is_timer_event = ITE } + +static const ompi_spc_event_t ompi_spc_events_desc[OMPI_SPC_NUM_COUNTERS] = { + SET_COUNTER_ARRAY(OMPI_SPC_SEND, "The number of times MPI_Send was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_BSEND, "The number of times MPI_Bsend was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_RSEND, "The number of times MPI_Rsend was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_SSEND, "The number of times MPI_Ssend was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_RECV, "The number of times MPI_Recv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_MRECV, "The number of times MPI_Mrecv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ISEND, "The number of times MPI_Isend was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IBSEND, "The number of times MPI_Ibsend was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IRSEND, "The number of times MPI_Irsend was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ISSEND, "The number of times MPI_Issend was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IRECV, "The number of times MPI_Irecv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_SENDRECV, "The number of times MPI_Sendrecv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_SENDRECV_REPLACE, "The number of times MPI_Sendrecv_replace was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_PUT, "The number of times MPI_Put was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_RPUT, "The number of times MPI_Rput was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_GET, "The number of times MPI_Get was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_RGET, "The number of times MPI_Rget was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_PROBE, "The number of times MPI_Probe was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IPROBE, "The number of times MPI_Iprobe was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_BCAST, "The number of times MPI_Bcast was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IBCAST, "The number of times MPI_Ibcast was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_BCAST_INIT, "The number of times MPIX_Bcast_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_REDUCE, "The number of times MPI_Reduce was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_SCATTER, "The number of times MPI_Reduce_scatter was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_SCATTER_BLOCK, "The number of times MPI_Reduce_scatter_block was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IREDUCE, "The number of times MPI_Ireduce was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IREDUCE_SCATTER, "The number of times MPI_Ireduce_scatter was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IREDUCE_SCATTER_BLOCK, "The number of times MPI_Ireduce_scatter_block was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_INIT, "The number of times MPIX_Reduce_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_SCATTER_INIT, "The number of times MPIX_Reduce_scatter_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_REDUCE_SCATTER_BLOCK_INIT, "The number of times MPIX_Reduce_scatter_block_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ALLREDUCE, "The number of times MPI_Allreduce was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IALLREDUCE, "The number of times MPI_Iallreduce was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ALLREDUCE_INIT, "The number of times MPIX_Allreduce_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_SCAN, "The number of times MPI_Scan was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_EXSCAN, "The number of times MPI_Exscan was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ISCAN, "The number of times MPI_Iscan was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IEXSCAN, "The number of times MPI_Iexscan was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_SCAN_INIT, "The number of times MPIX_Scan_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_EXSCAN_INIT, "The number of times MPIX_Exscan_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_SCATTER, "The number of times MPI_Scatter was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_SCATTERV, "The number of times MPI_Scatterv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ISCATTER, "The number of times MPI_Iscatter was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ISCATTERV, "The number of times MPI_Iscatterv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_SCATTER_INIT, "The number of times MPIX_Scatter_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_SCATTERV_INIT, "The number of times MPIX_Scatterv_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_GATHER, "The number of times MPI_Gather was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_GATHERV, "The number of times MPI_Gatherv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IGATHER, "The number of times MPI_Igather was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IGATHERV, "The number of times MPI_Igatherv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_GATHER_INIT, "The number of times MPIX_Gather_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_GATHERV_INIT, "The number of times MPIX_Gatherv_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALL, "The number of times MPI_Alltoall was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALLV, "The number of times MPI_Alltoallv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALLW, "The number of times MPI_Alltoallw was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IALLTOALL, "The number of times MPI_Ialltoall was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IALLTOALLV, "The number of times MPI_Ialltoallv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IALLTOALLW, "The number of times MPI_Ialltoallw was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALL_INIT, "The number of times MPIX_Alltoall_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALLV_INIT, "The number of times MPIX_Alltoallv_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ALLTOALLW_INIT, "The number of times MPIX_Alltoallw_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALL, "The number of times MPI_Neighbor_alltoall was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALLV, "The number of times MPI_Neighbor_alltoallv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALLW, "The number of times MPI_Neighbor_alltoallw was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLTOALL, "The number of times MPI_Ineighbor_alltoall was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLTOALLV, "The number of times MPI_Ineighbor_alltoallv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLTOALLW, "The number of times MPI_Ineighbor_alltoallw was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALL_INIT, "The number of times MPIX_Neighbor_alltoall_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALLV_INIT, "The number of times MPIX_Neighbor_alltoallv_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLTOALLW_INIT, "The number of times MPIX_Neighbor_alltoallw_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ALLGATHER, "The number of times MPI_Allgather was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ALLGATHERV, "The number of times MPI_Allgatherv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IALLGATHER, "The number of times MPI_Iallgather was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IALLGATHERV, "The number of times MPI_Iallgatherv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ALLGATHER_INIT, "The number of times MPIX_Allgather_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_ALLGATHERV_INIT, "The number of times MPIX_Allgatherv_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLGATHER, "The number of times MPI_Neighbor_allgather was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLGATHERV, "The number of times MPI_Neighbor_allgatherv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLGATHER, "The number of times MPI_Ineighbor_allgather was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_INEIGHBOR_ALLGATHERV, "The number of times MPI_Ineighbor_allgatherv was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLGATHER_INIT, "The number of times MPIX_Neighbor_allgather_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_NEIGHBOR_ALLGATHERV_INIT, "The number of times MPIX_Neighbor_allgatherv_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_TEST, "The number of times MPI_Test was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_TESTALL, "The number of times MPI_Testall was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_TESTANY, "The number of times MPI_Testany was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_TESTSOME, "The number of times MPI_Testsome was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_WAIT, "The number of times MPI_Wait was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_WAITALL, "The number of times MPI_Waitall was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_WAITANY, "The number of times MPI_Waitany was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_WAITSOME, "The number of times MPI_Waitsome was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_BARRIER, "The number of times MPI_Barrier was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_IBARRIER, "The number of times MPI_Ibarrier was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_BARRIER_INIT, "The number of times MPIX_Barrier_init was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_WTIME, "The number of times MPI_Wtime was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_CANCEL, "The number of times MPI_Cancel was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_BYTES_RECEIVED_USER, "The number of bytes received by the user through point-to-point communications. Note: Includes bytes transferred using internal RMA operations.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_BYTES_RECEIVED_MPI, "The number of bytes received by MPI through collective, control, or other internal communications.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_BYTES_SENT_USER, "The number of bytes sent by the user through point-to-point communications. Note: Includes bytes transferred using internal RMA operations.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_BYTES_SENT_MPI, "The number of bytes sent by MPI through collective, control, or other internal communications.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_BYTES_PUT, "The number of bytes sent/received using RMA Put operations both through user-level Put functions and internal Put functions.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_BYTES_GET, "The number of bytes sent/received using RMA Get operations both through user-level Get functions and internal Get functions.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_UNEXPECTED, "The number of messages that arrived as unexpected messages.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_OUT_OF_SEQUENCE, "The number of messages that arrived out of the proper sequence.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_MATCH_TIME, "The number of microseconds spent matching unexpected messages. Note: The timer used on the back end is in cycles, which could potentially be problematic on a system where the clock frequency can change. On such a system, this counter could be inaccurate since we assume a fixed clock rate.", false, true), + SET_COUNTER_ARRAY(OMPI_SPC_UNEXPECTED_IN_QUEUE, "The number of messages that are currently in the unexpected message queue(s) of an MPI process.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_OOS_IN_QUEUE, "The number of messages that are currently in the out of sequence message queue(s) of an MPI process.", false, false), SET_COUNTER_ARRAY(OMPI_SPC_MAX_UNEXPECTED_IN_QUEUE, "The maximum number of messages that the unexpected message queue(s) within an MPI process " - "contained at once since the last reset of this counter. Note: This counter is reset each time it is read."), + "contained at once since the last reset of this counter. Note: This counter is reset each time it is read.", true, false), SET_COUNTER_ARRAY(OMPI_SPC_MAX_OOS_IN_QUEUE, "The maximum number of messages that the out of sequence message queue(s) within an MPI process " - "contained at once since the last reset of this counter. Note: This counter is reset each time it is read.") + "contained at once since the last reset of this counter. Note: This counter is reset each time it is read.", true, false) }; -/* An array of integer values to denote whether an event is activated (1) or not (0) */ -static uint32_t ompi_spc_attached_event[OMPI_SPC_NUM_COUNTERS / sizeof(uint32_t)] = { 0 }; -/* An array of integer values to denote whether an event is timer-based (1) or not (0) */ -static uint32_t ompi_spc_timer_event[OMPI_SPC_NUM_COUNTERS / sizeof(uint32_t)] = { 0 }; -/* An array of event structures to store the event data (name and value) */ -static ompi_spc_t *ompi_spc_events = NULL; - -static inline void SET_SPC_BIT(uint32_t* array, int32_t pos) -{ - assert(pos < OMPI_SPC_NUM_COUNTERS); - array[pos / (8 * sizeof(uint32_t))] |= (1U << (pos % (8 * sizeof(uint32_t)))); -} - -static inline bool IS_SPC_BIT_SET(uint32_t* array, int32_t pos) -{ - assert(pos < OMPI_SPC_NUM_COUNTERS); - return !!(array[pos / (8 * sizeof(uint32_t))] & (1U << (pos % (8 * sizeof(uint32_t))))); -} - -static inline void CLEAR_SPC_BIT(uint32_t* array, int32_t pos) -{ - assert(pos < OMPI_SPC_NUM_COUNTERS); - array[pos / (8 * sizeof(uint32_t))] &= ~(1U << (pos % (8 * sizeof(uint32_t)))); -} +/* An array of event structures to store the event data (value, attachments, flags) */ +ompi_spc_t ompi_spc_events[OMPI_SPC_NUM_COUNTERS]; /* ############################################################## * ################# Begin MPI_T Functions ###################### * ############################################################## */ -static int ompi_spc_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, void *obj_handle, int *count) - __opal_attribute_unused__; static int ompi_spc_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, void *obj_handle, int *count) { @@ -197,11 +195,11 @@ static int ompi_spc_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, v } /* For this event, we need to turn on the counter */ else if(MCA_BASE_PVAR_HANDLE_START == event) { - SET_SPC_BIT(ompi_spc_attached_event, index); + opal_atomic_fetch_add_32(&ompi_spc_events[index].num_attached, 1); } /* For this event, we need to turn off the counter */ else if(MCA_BASE_PVAR_HANDLE_STOP == event) { - CLEAR_SPC_BIT(ompi_spc_attached_event, index); + opal_atomic_fetch_add_32(&ompi_spc_events[index].num_attached, -1); } return MPI_SUCCESS; @@ -217,65 +215,61 @@ static int ompi_spc_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, v * so we need to convert from MPI_T index to SPC index and then set the 'value' argument * to the correct value for this pvar. */ -static int ompi_spc_get_count(const struct mca_base_pvar_t *pvar, void *value, void *obj_handle) - __opal_attribute_unused__; - static int ompi_spc_get_count(const struct mca_base_pvar_t *pvar, void *value, void *obj_handle) { - long long *counter_value = (long long*)value; + long long *counter_value_ptr = (long long*)value; + long long counter_value; if(OPAL_LIKELY(!mpi_t_enabled)) { - *counter_value = 0; + *counter_value_ptr = 0; return MPI_SUCCESS; } /* Convert from MPI_T pvar index to SPC index */ int index = (int)(uintptr_t)pvar->ctx; /* Set the counter value to the current SPC value */ - *counter_value = (long long)ompi_spc_events[index].value; + counter_value = (long long)ompi_spc_events[index].value; /* If this is a timer-based counter, convert from cycles to microseconds */ - if( IS_SPC_BIT_SET(ompi_spc_timer_event, index) ) { - *counter_value /= sys_clock_freq_mhz; + if( ompi_spc_events[index].is_timer_event ) { + counter_value /= sys_clock_freq_mhz; } /* If this is a high watermark counter, reset it after it has been read */ - if(index == OMPI_SPC_MAX_UNEXPECTED_IN_QUEUE || index == OMPI_SPC_MAX_OOS_IN_QUEUE) { + if(ompi_spc_events[index].is_high_watermark) { ompi_spc_events[index].value = 0; } + *counter_value_ptr = counter_value; + return MPI_SUCCESS; } -/* Initializes the events data structure and allocates memory for it if needed. */ -void ompi_spc_events_init(void) +/* Allocate and initializes the events data structure. */ +static void ompi_spc_events_init(void) { int i; - /* If the events data structure hasn't been allocated yet, allocate memory for it */ - if(NULL == ompi_spc_events) { - ompi_spc_events = (ompi_spc_t*)malloc(OMPI_SPC_NUM_COUNTERS * sizeof(ompi_spc_t)); - if(ompi_spc_events == NULL) { - opal_show_help("help-mpi-runtime.txt", "lib-call-fail", true, - "malloc", __FILE__, __LINE__); - return; - } - } - /* The data structure has been allocated, so we simply initialize all of the counters - * with their names and an initial count of 0. + /* Initialize all of the counters with an initial count of 0. + * Also copy over the flags for faster access later. */ for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) { - ompi_spc_events[i].name = (char*)ompi_spc_events_names[i].counter_name; ompi_spc_events[i].value = 0; + ompi_spc_events[i].num_attached = 0; + ompi_spc_events[i].is_high_watermark = ompi_spc_events_desc[i].is_high_watermark; + ompi_spc_events[i].is_timer_event = ompi_spc_events_desc[i].is_timer_event; } - ompi_comm_dup(&ompi_mpi_comm_world.comm, &ompi_spc_comm); + if (ompi_mpi_spc_dump_enabled) { + ompi_comm_dup(&ompi_mpi_comm_world.comm, &ompi_spc_comm); + } } -/* Initializes the SPC data structures and registers all counters as MPI_T pvars. - * Turns on only the counters that were specified in the mpi_spc_attach MCA parameter. +/* + * Initializes the SPC events infrastructure. + * Registers all counters requested through the MCA parameter mpi_spc_attach as MPI_T pvars. */ void ompi_spc_init(void) { - int i, j, ret, found = 0, all_on = 0, matched = 0; + int i, j, ret, all_on = 0, matched = 0; /* Initialize the clock frequency variable as the CPU's frequency in MHz */ sys_clock_freq_mhz = opal_timer_base_get_freq() / 1000000; @@ -295,15 +289,16 @@ void ompi_spc_init(void) } } + /* enable mpi_t and only disable if something goes wrong */ + mpi_t_enabled = true; + for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) { - /* Reset all timer-based counters */ - CLEAR_SPC_BIT(ompi_spc_timer_event, i); matched = all_on; if( !matched ) { /* Turn on only the counters that were specified in the MCA parameter */ for(j = 0; j < num_args; j++) { - if( 0 == strcmp(ompi_spc_events_names[i].counter_name, arg_strings[j]) ) { + if( 0 == strcmp(ompi_spc_events_desc[i].counter_name, arg_strings[j]) ) { matched = 1; break; } @@ -311,16 +306,14 @@ void ompi_spc_init(void) } if (matched) { - SET_SPC_BIT(ompi_spc_attached_event, i); - mpi_t_enabled = true; - found++; + opal_atomic_fetch_add_32(&ompi_spc_events[i].num_attached, 1); } /* Registers the current counter as an MPI_T pvar regardless of whether it's been turned on or not */ - ret = mca_base_pvar_register("ompi", "runtime", "spc", ompi_spc_events_names[i].counter_name, ompi_spc_events_names[i].counter_description, + ret = mca_base_pvar_register("ompi", "runtime", "spc", ompi_spc_events_desc[i].counter_name, ompi_spc_events_desc[i].counter_description, OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, MPI_T_BIND_NO_OBJECT, - MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, + MCA_BASE_PVAR_FLAG_READONLY, ompi_spc_get_count, NULL, ompi_spc_notify, (void*)(uintptr_t)i); if( ret < 0 ) { mpi_t_enabled = false; @@ -329,9 +322,6 @@ void ompi_spc_init(void) } } - /* If this is a timer event, set the corresponding timer_event entry */ - SET_SPC_BIT(ompi_spc_timer_event, OMPI_SPC_MATCH_TIME); - opal_argv_free(arg_strings); } @@ -348,8 +338,8 @@ static void ompi_spc_dump(void) /* Convert from cycles to usecs before sending */ for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) { - if( IS_SPC_BIT_SET(ompi_spc_timer_event, i) ) { - SPC_CYCLES_TO_USECS(&ompi_spc_events[i].value); + if( ompi_spc_events[i].is_timer_event ) { + ompi_spc_events[i].value = ompi_spc_cycles_to_usecs_internal(ompi_spc_events[i].value); } } @@ -383,11 +373,10 @@ static void ompi_spc_dump(void) for(j = 0; j < world_size; j++) { opal_output(0, "MPI_COMM_WORLD Rank %d:\n", j); for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) { - /* If this is a timer-based counter, we need to covert from cycles to usecs */ if( 0 == recv_buffer[offset+i] ) { continue; } - opal_output(0, "%s -> %lld\n", ompi_spc_events[i].name, recv_buffer[offset+i]); + opal_output(0, "%s -> %lld\n", ompi_spc_events_desc[i].counter_name, recv_buffer[offset+i]); } opal_output(0, "\n"); offset += OMPI_SPC_NUM_COUNTERS; @@ -406,79 +395,26 @@ static void ompi_spc_dump(void) /* Frees any dynamically alocated OMPI SPC data structures */ void ompi_spc_fini(void) { - if (SPC_ENABLE == 1 && ompi_mpi_spc_dump_enabled) { + if (ompi_mpi_spc_dump_enabled) { ompi_spc_dump(); + ompi_comm_free(&ompi_spc_comm); } - - free(ompi_spc_events); ompi_spc_events = NULL; - ompi_comm_free(&ompi_spc_comm); } -/* Records an update to a counter using an atomic add operation. */ -void ompi_spc_record(unsigned int event_id, ompi_spc_value_t value) -{ - /* Denoted unlikely because counters will often be turned off. */ - if( OPAL_UNLIKELY(IS_SPC_BIT_SET(ompi_spc_attached_event, event_id)) ) { - OPAL_THREAD_ADD_FETCH_SIZE_T(&(ompi_spc_events[event_id].value), value); - } -} - -/* Starts cycle-precision timer and stores the start value in the 'cycles' argument. - * Note: This assumes that the 'cycles' argument is initialized to 0 if the timer - * hasn't been started yet. - */ -void ompi_spc_timer_start(unsigned int event_id, opal_timer_t *cycles) -{ - /* Check whether cycles == 0.0 to make sure the timer hasn't started yet. - * This is denoted unlikely because the counters will often be turned off. - */ - if( OPAL_UNLIKELY(IS_SPC_BIT_SET(ompi_spc_attached_event, event_id) && *cycles == 0) ) { - *cycles = opal_timer_base_get_cycles(); - } -} - -/* Stops a cycle-precision timer and calculates the total elapsed time - * based on the starting time in 'cycles' and stores the result in the - * 'cycles' argument. - */ -void ompi_spc_timer_stop(unsigned int event_id, opal_timer_t *cycles) -{ - /* This is denoted unlikely because the counters will often be turned off. */ - if( OPAL_UNLIKELY(IS_SPC_BIT_SET(ompi_spc_attached_event, event_id)) ) { - *cycles = opal_timer_base_get_cycles() - *cycles; - OPAL_THREAD_ADD_FETCH_SIZE_T(&ompi_spc_events[event_id].value, (size_t) *cycles); - } -} - -/* Checks a tag, and records the user version of the counter if it's greater - * than or equal to 0 and records the mpi version of the counter otherwise. - */ -void ompi_spc_user_or_mpi(int tag, ompi_spc_value_t value, unsigned int user_enum, unsigned int mpi_enum) -{ - SPC_RECORD( (tag >= 0 ? user_enum : mpi_enum), value); -} - -/* Checks whether the counter denoted by value_enum exceeds the current value of the - * counter denoted by watermark_enum, and if so sets the watermark_enum counter to the - * value of the value_enum counter. +/* Converts a counter value that is in cycles to microseconds. + * Internal helper function that can be inlined. */ -void ompi_spc_update_watermark(unsigned int watermark_enum, unsigned int value_enum) +static inline +ompi_spc_value_t ompi_spc_cycles_to_usecs_internal(opal_timer_t cycles) { - /* Denoted unlikely because counters will often be turned off. */ - if( OPAL_UNLIKELY(IS_SPC_BIT_SET(ompi_spc_attached_event, watermark_enum) && - IS_SPC_BIT_SET(ompi_spc_attached_event, value_enum)) ) { - /* WARNING: This assumes that this function was called while a lock has already been taken. - * This function is NOT thread safe otherwise! - */ - if(ompi_spc_events[value_enum].value > ompi_spc_events[watermark_enum].value) { - ompi_spc_events[watermark_enum].value = ompi_spc_events[value_enum].value; - } - } + return (cycles / sys_clock_freq_mhz); } /* Converts a counter value that is in cycles to microseconds. */ -void ompi_spc_cycles_to_usecs(ompi_spc_value_t *cycles) +void ompi_spc_cycles_to_usecs(opal_timer_t *cycles) { - *cycles = *cycles / sys_clock_freq_mhz; + *cycles = ompi_spc_cycles_to_usecs_internal(*cycles); } + +#endif // SPC_ENABLE diff --git a/ompi/runtime/ompi_spc.h b/ompi/runtime/ompi_spc.h index 5d040511c34..374c50a5334 100644 --- a/ompi/runtime/ompi_spc.h +++ b/ompi/runtime/ompi_spc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 The University of Tennessee and The University + * Copyright (c) 2018-2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2018 Research Organization for Information Science @@ -15,19 +15,11 @@ #ifndef OMPI_SPC #define OMPI_SPC -#include -#include -#include -#include +#include "ompi_config.h" -#include "ompi/communicator/communicator.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/runtime/params.h" -#include "opal/mca/timer/timer.h" -#include "opal/mca/base/mca_base_pvar.h" -#include "opal/util/argv.h" -#include "opal/util/show_help.h" -#include "opal/util/output.h" +#include "opal/sys/atomic.h" +#include "opal/include/opal/prefetch.h" +#include "opal/mca/threads/thread_usage.h" #include MCA_timer_IMPLEMENTATION_HEADER @@ -166,31 +158,31 @@ typedef enum ompi_spc_counters { /* There is currently no support for atomics on long long values so we will default to * size_t for now until support for such atomics is implemented. */ -typedef opal_atomic_size_t ompi_spc_value_t; +typedef long long ompi_spc_value_t; /* A structure for storing the event data */ typedef struct ompi_spc_s{ - char *name; - ompi_spc_value_t value; + opal_atomic_int64_t value; + opal_atomic_int32_t num_attached; + bool is_high_watermark; + bool is_timer_event; } ompi_spc_t; -/* Events data structure initialization function */ -void ompi_spc_events_init(void); +/* Definitions for using the SPC utility functions throughout the codebase. + * If SPC_ENABLE is not 1, the macros become no-ops. + */ +#if SPC_ENABLE == 1 /* OMPI SPC utility functions */ void ompi_spc_init(void); void ompi_spc_fini(void); -void ompi_spc_record(unsigned int event_id, ompi_spc_value_t value); -void ompi_spc_timer_start(unsigned int event_id, opal_timer_t *cycles); -void ompi_spc_timer_stop(unsigned int event_id, opal_timer_t *cycles); -void ompi_spc_user_or_mpi(int tag, ompi_spc_value_t value, unsigned int user_enum, unsigned int mpi_enum); -void ompi_spc_cycles_to_usecs(ompi_spc_value_t *cycles); -void ompi_spc_update_watermark(unsigned int watermark_enum, unsigned int value_enum); - -/* Macros for using the SPC utility functions throughout the codebase. - * If SPC_ENABLE is not 1, the macros become no-ops. +void ompi_spc_cycles_to_usecs(opal_timer_t *cycles); + +/* An array of event structures to store the event data value, attachments, flags) + * The memory is statically allocated to reduce the number of loads required. */ -#if SPC_ENABLE == 1 +OPAL_DECLSPEC extern +ompi_spc_t ompi_spc_events[OMPI_SPC_NUM_COUNTERS] __opal_attribute_aligned__(sizeof(ompi_spc_t)); #define SPC_INIT() \ ompi_spc_init() @@ -216,6 +208,79 @@ void ompi_spc_update_watermark(unsigned int watermark_enum, unsigned int value_e #define SPC_UPDATE_WATERMARK(watermark_enum, value_enum) \ ompi_spc_update_watermark(watermark_enum, value_enum) + +/* Records an update to a counter using an atomic add operation. */ +static inline +void ompi_spc_record(unsigned int event_id, ompi_spc_value_t value) +{ + /* Denoted unlikely because counters will often be turned off. */ + if( ompi_spc_events[event_id].num_attached > 0 ) { + OPAL_THREAD_ADD_FETCH64(&(ompi_spc_events[event_id].value), value); + } +} + +/* Checks a tag, and records the user version of the counter if it's greater + * than or equal to 0 and records the mpi version of the counter otherwise. + */ +static inline +void ompi_spc_user_or_mpi(int tag, ompi_spc_value_t value, unsigned int user_enum, unsigned int mpi_enum) +{ + ompi_spc_record( (tag >= 0 ? user_enum : mpi_enum), value); +} + +/* Checks whether the counter denoted by value_enum exceeds the current value of the + * counter denoted by watermark_enum, and if so sets the watermark_enum counter to the + * value of the value_enum counter. + */ +static inline +void ompi_spc_update_watermark(unsigned int watermark_enum, unsigned int value_enum) +{ + ompi_spc_t *watermark_event = &ompi_spc_events[watermark_enum]; + ompi_spc_t *value_event = &ompi_spc_events[value_enum]; + /* Denoted unlikely because counters will often be turned off. */ + if( watermark_event->num_attached && + value_event->num_attached ) { + int64_t watermark = watermark_event->value; + int64_t value = watermark_event->value; + /* Try to atomically replace the watermark while the value is larger + * (i.e, while no thread has replaced it with a larger value, including this thread) */ + while (value > watermark && + !OPAL_THREAD_COMPARE_EXCHANGE_STRONG_64(&watermark_event->value, + &watermark, value)) + { } + } +} + +/* + * Starts cycle-precision timer and stores the start value in the 'cycles' argument. + * The value is always stored in 'cycles' to avoid race conditions with other threads + * activating a previously inactive timer counter in between start and stop. + */ +static inline +void ompi_spc_timer_start(unsigned int event_id, opal_timer_t *cycles) +{ + (void)event_id; /* unused */ + *cycles = 0; + + if( (ompi_spc_events[event_id].num_attached > 0) ) { + *cycles = opal_timer_base_get_cycles(); + } +} + +/* Stops a cycle-precision timer and calculates the total elapsed time + * based on the starting time in 'cycles' and stores the result in the + * 'cycles' argument. + */ +static inline +void ompi_spc_timer_stop(unsigned int event_id, opal_timer_t *cycles) +{ + if( ompi_spc_events[event_id].num_attached > 0 && *cycles > 0 ) { + *cycles = opal_timer_base_get_cycles() - *cycles; + OPAL_THREAD_ADD_FETCH64(&ompi_spc_events[event_id].value, *cycles); + } +} + + #else /* SPCs are not enabled */ #define SPC_INIT() \ diff --git a/opal/mca/btl/ofi/btl_ofi_atomics.c b/opal/mca/btl/ofi/btl_ofi_atomics.c index 34fa9cc4776..f9b2447130b 100644 --- a/opal/mca/btl/ofi/btl_ofi_atomics.c +++ b/opal/mca/btl/ofi/btl_ofi_atomics.c @@ -73,8 +73,10 @@ int mca_btl_ofi_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end fi_datatype, fi_op, &comp->comp_ctx); if (rc == -FI_EAGAIN) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); return OPAL_ERR_OUT_OF_RESOURCE; } else if (rc < 0) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); BTL_ERROR(("fi_fetch_atomic failed with rc=%d (%s)", rc, fi_strerror(-rc))); MCA_BTL_OFI_ABORT(); } @@ -125,8 +127,10 @@ int mca_btl_ofi_aop (struct mca_btl_base_module_t *btl, mca_btl_base_endpoint_t fi_datatype, fi_op, &comp->comp_ctx); if (rc == -FI_EAGAIN) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); return OPAL_ERR_OUT_OF_RESOURCE; } else if (rc < 0) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); BTL_ERROR(("fi_atomic failed with rc=%d (%s)", rc, fi_strerror(-rc))); MCA_BTL_OFI_ABORT(); } @@ -181,8 +185,10 @@ int mca_btl_ofi_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_e &comp->comp_ctx); if (rc == -FI_EAGAIN) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); return OPAL_ERR_OUT_OF_RESOURCE; } else if (rc < 0) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); BTL_ERROR(("fi_compare_atomic failed with rc=%d (%s)", rc, fi_strerror(-rc))); MCA_BTL_OFI_ABORT(); } diff --git a/opal/mca/btl/ofi/btl_ofi_rdma.c b/opal/mca/btl/ofi/btl_ofi_rdma.c index 0ecbd887bc1..f2728e36dc0 100644 --- a/opal/mca/btl/ofi/btl_ofi_rdma.c +++ b/opal/mca/btl/ofi/btl_ofi_rdma.c @@ -88,10 +88,12 @@ int mca_btl_ofi_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi &comp->comp_ctx); /* completion context */ if (-FI_EAGAIN == rc) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); return OPAL_ERR_OUT_OF_RESOURCE; } if (0 != rc) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); BTL_ERROR(("fi_read failed with %d:%s", rc, fi_strerror(-rc))); MCA_BTL_OFI_ABORT(); } @@ -133,10 +135,12 @@ int mca_btl_ofi_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi &comp->comp_ctx); /* completion context */ if (-FI_EAGAIN == rc) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); return OPAL_ERR_OUT_OF_RESOURCE; } if (0 != rc) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); BTL_ERROR(("fi_write failed with %d:%s", rc, fi_strerror(-rc))); MCA_BTL_OFI_ABORT(); } diff --git a/opal/mca/common/ofi/common_ofi.c b/opal/mca/common/ofi/common_ofi.c index 97140b30dbd..593843b87d4 100644 --- a/opal/mca/common/ofi/common_ofi.c +++ b/opal/mca/common/ofi/common_ofi.c @@ -340,7 +340,10 @@ static uint32_t get_package_rank(opal_process_info_t *process_info) &pname, &locality_string, PMIX_STRING); if (PMIX_SUCCESS != rc || NULL == locality_string) { // If we don't have information about locality, fall back to procid - opal_show_help("help-common-ofi.txt", "package_rank failed", true); + opal_output_verbose(1, opal_common_ofi.output, + "%s:%d:Unable to get locality string from local peers.\n" + "This may negatively impact performance.\n", + __FILE__, __LINE__); return (uint32_t)process_info->myprocid.rank; } diff --git a/opal/mca/threads/pthreads/configure.m4 b/opal/mca/threads/pthreads/configure.m4 index 9919db49632..35b8766c8e7 100644 --- a/opal/mca/threads/pthreads/configure.m4 +++ b/opal/mca/threads/pthreads/configure.m4 @@ -52,9 +52,9 @@ CFLAGS="$CFLAGS $THREAD_CFLAGS" CPPFLAGS_save="$CPPFLAGS" CPPFLAGS="$CPPFLAGS $THREAD_CPPFLAGS" LDFLAGS_save="$LDFLAGS" -LDFLAGS="$LDFLAGS $THREAD_LDFLAGS" +LDFLAGS="$THREAD_LDFLAGS" LIBS_save="$LIBS" -LIBS="$LIBS $THREAD_LIBS" +LIBS="$THREAD_LIBS" AC_RUN_IFELSE([AC_LANG_SOURCE([#include #include #include diff --git a/test/datatype/reduce_local.c b/test/datatype/reduce_local.c index 97890f94227..f227439b714 100644 --- a/test/datatype/reduce_local.c +++ b/test/datatype/reduce_local.c @@ -59,7 +59,7 @@ static int total_errors = 0; _a < _b ? _a : _b; }) static void print_status(char* op, char* type, int type_size, - int count, double duration, + int count, int max_shift, double *duration, int repeats, int correct ) { if(correct) { @@ -68,7 +68,15 @@ static void print_status(char* op, char* type, int type_size, printf("%-10s %s [\033[1;31mfail\033[0m]", op, type); total_errors++; } - printf(" count %-10d time %.6f seconds\n", count, duration); + if( 1 == max_shift ) { + printf(" count %-10d time (seconds) %.8f seconds\n", count, duration[0] / repeats); + } else { + printf(" count %-10d time (seconds / shifts) ", count); + for( int i = 0; i < max_shift; i++ ) { + printf("%.8f ", duration[i] / repeats ); + } + printf("\n"); + } } static int do_ops_built = 0; @@ -115,19 +123,23 @@ do { \ const TYPE *_p1 = ((TYPE*)(INBUF)), *_p3 = ((TYPE*)(CHECK_BUF)); \ TYPE *_p2 = ((TYPE*)(INOUT_BUF)); \ skip_op_type = 0; \ - for(int _k = 0; _k < min((COUNT), 4); +_k++ ) { \ - memcpy(_p2, _p3, sizeof(TYPE) * (COUNT)); \ - tstart = MPI_Wtime(); \ - MPI_Reduce_local(_p1+_k, _p2+_k, (COUNT)-_k, (MPITYPE), (MPIOP)); \ - tend = MPI_Wtime(); \ - if( check ) { \ - for( i = 0; i < (COUNT)-_k; i++ ) { \ - if(((_p2+_k)[i]) == (((_p1+_k)[i]) OPNAME ((_p3+_k)[i]))) \ - continue; \ - printf("First error at alignment %d position %d (%" TYPE_PREFIX " %s %" TYPE_PREFIX " != %" TYPE_PREFIX ")\n", \ - _k, i, (_p1+_k)[i], (#OPNAME), (_p3+_k)[i], (_p2+_k)[i]); \ - correctness = 0; \ - break; \ + for(int _k = 0; _k < min((COUNT), max_shift); +_k++ ) { \ + duration[_k] = 0.0; \ + for(int _r = repeats; _r > 0; _r--) { \ + memcpy(_p2, _p3, sizeof(TYPE) * (COUNT)); \ + tstart = MPI_Wtime(); \ + MPI_Reduce_local(_p1+_k, _p2+_k, (COUNT)-_k, (MPITYPE), (MPIOP)); \ + tend = MPI_Wtime(); \ + duration[_k] += (tend - tstart); \ + if( check ) { \ + for( i = 0; i < (COUNT)-_k; i++ ) { \ + if(((_p2+_k)[i]) == (((_p1+_k)[i]) OPNAME ((_p3+_k)[i]))) \ + continue; \ + printf("First error at alignment %d position %d (%" TYPE_PREFIX " %s %" TYPE_PREFIX " != %" TYPE_PREFIX ")\n", \ + _k, i, (_p1+_k)[i], (#OPNAME), (_p3+_k)[i], (_p2+_k)[i]); \ + correctness = 0; \ + break; \ + } \ } \ } \ } \ @@ -139,20 +151,24 @@ do { \ const TYPE *_p1 = ((TYPE*)(INBUF)), *_p3 = ((TYPE*)(CHECK_BUF)); \ TYPE *_p2 = ((TYPE*)(INOUT_BUF)); \ skip_op_type = 0; \ - for(int _k = 0; _k < min((COUNT), 4); +_k++ ) { \ - memcpy(_p2, _p3, sizeof(TYPE) * (COUNT)); \ - tstart = MPI_Wtime(); \ - MPI_Reduce_local(_p1+_k, _p2+_k, (COUNT), (MPITYPE), (MPIOP)); \ - tend = MPI_Wtime(); \ - if( check ) { \ - for( i = 0; i < (COUNT); i++ ) { \ - TYPE _v1 = *(_p1+_k), _v2 = *(_p2+_k), _v3 = *(_p3+_k); \ - if(_v2 == OPNAME(_v1, _v3)) \ - continue; \ - printf("First error at alignment %d position %d (%" TYPE_PREFIX " != %s(%" TYPE_PREFIX ", %" TYPE_PREFIX ")\n", \ - _k, i, _v1, (#OPNAME), _v3, _v2); \ - correctness = 0; \ - break; \ + for(int _k = 0; _k < min((COUNT), max_shift); +_k++ ) { \ + duration[_k] = 0.0; \ + for(int _r = repeats; _r > 0; _r--) { \ + memcpy(_p2, _p3, sizeof(TYPE) * (COUNT)); \ + tstart = MPI_Wtime(); \ + MPI_Reduce_local(_p1+_k, _p2+_k, (COUNT), (MPITYPE), (MPIOP)); \ + tend = MPI_Wtime(); \ + duration[_k] += (tend - tstart); \ + if( check ) { \ + for( i = 0; i < (COUNT); i++ ) { \ + TYPE _v1 = *(_p1+_k), _v2 = *(_p2+_k), _v3 = *(_p3+_k); \ + if(_v2 == OPNAME(_v1, _v3)) \ + continue; \ + printf("First error at alignment %d position %d (%" TYPE_PREFIX " != %s(%" TYPE_PREFIX ", %" TYPE_PREFIX ")\n", \ + _k, i, _v1, (#OPNAME), _v3, _v2); \ + correctness = 0; \ + break; \ + } \ } \ } \ } \ @@ -163,24 +179,36 @@ int main(int argc, char **argv) { static void *in_buf = NULL, *inout_buf = NULL, *inout_check_buf = NULL; int count, type_size = 8, rank, size, provided, correctness = 1; - int repeats = 1, i, c; - double tstart, tend; + int repeats = 1, i, c, op1_alignment = 0, res_alignment = 0; + int max_shift = 4; + double *duration, tstart, tend; bool check = true; char type[5] = "uifd", *op = "sum", *mpi_type; int lower = 1, upper = 1000000, skip_op_type; MPI_Op mpi_op; - while( -1 != (c = getopt(argc, argv, "l:u:t:o:s:n:vfh")) ) { + while( -1 != (c = getopt(argc, argv, "l:u:r:t:o:i:s:n:1:2:vfh")) ) { switch(c) { case 'l': lower = atoi(optarg); if( lower <= 0 ) { - fprintf(stderr, "The number of elements must be positive\n"); + fprintf(stderr, "The lower number of elements must be positive\n"); exit(-1); } break; case 'u': upper = atoi(optarg); + if( lower <= 0 ) { + fprintf(stderr, "The upper number of elements must be positive\n"); + exit(-1); + } + break; + case 'i': + max_shift = atoi(optarg); + if( max_shift <= 0 ) { + fprintf(stderr, "The max shift must be positive\n"); + exit(-1); + } break; case 'f': check = false; @@ -216,14 +244,32 @@ int main(int argc, char **argv) exit(-1); } break; + case '1': + op1_alignment = atoi(optarg); + if( op1_alignment < 0 ) { + fprintf(stderr, "alignment for the first operand must be positive\n"); + exit(-1); + } + break; + case '2': + res_alignment = atoi(optarg); + if( res_alignment < 0 ) { + fprintf(stderr, "alignment for the result must be positive\n"); + exit(-1); + } + break; case 'h': fprintf(stdout, "%s options are:\n" " -l : lower number of elements\n" " -u : upper number of elements\n" " -s : 8, 16, 32 or 64 bits elements\n" " -t [i,u,f,d] : type of the elements to apply the operations on\n" + " -r : number of repetitions for each test\n" " -o : comma separated list of operations to execute among\n" " sum, min, max, prod, bor, bxor, band\n" + " -i : shift on all buffers to check alignment\n" + " -1 : (mis)alignment in elements for the first op\n" + " -2 : (mis)alignment in elements for the result\n" " -v: increase the verbosity level\n" " -h: this help message\n", argv[0]); exit(0); @@ -233,9 +279,10 @@ int main(int argc, char **argv) if( !do_ops_built ) { /* not yet done, take the default */ build_do_ops( "all", do_ops); } - in_buf = malloc(upper * sizeof(double)); - inout_buf = malloc(upper * sizeof(double)); - inout_check_buf = malloc(upper * sizeof(double)); + posix_memalign( &in_buf, 64, (upper + op1_alignment) * sizeof(double)); + posix_memalign( &inout_buf, 64, (upper + res_alignment) * sizeof(double)); + posix_memalign( &inout_check_buf, 64, upper * sizeof(double)); + duration = (double*)malloc(max_shift * sizeof(double)); ompi_mpi_init(argc, argv, MPI_THREAD_SERIALIZED, &provided, false); @@ -253,8 +300,8 @@ int main(int argc, char **argv) correctness = 1; if('i' == type[type_idx]) { if( 8 == type_size ) { - int8_t *in_int8 = (int8_t*)in_buf, - *inout_int8 = (int8_t*)inout_buf, + int8_t *in_int8 = (int8_t*)((char*)in_buf + op1_alignment * sizeof(int8_t)), + *inout_int8 = (int8_t*)((char*)inout_buf + res_alignment * sizeof(int8_t)), *inout_int8_for_check = (int8_t*)inout_check_buf; for( i = 0; i < count; i++ ) { in_int8[i] = 5; @@ -299,8 +346,8 @@ int main(int argc, char **argv) } } if( 16 == type_size ) { - int16_t *in_int16 = (int16_t*)in_buf, - *inout_int16 = (int16_t*)inout_buf, + int16_t *in_int16 = (int16_t*)((char*)in_buf + op1_alignment * sizeof(int16_t)), + *inout_int16 = (int16_t*)((char*)inout_buf + res_alignment * sizeof(int16_t)), *inout_int16_for_check = (int16_t*)inout_check_buf; for( i = 0; i < count; i++ ) { in_int16[i] = 5; @@ -345,8 +392,8 @@ int main(int argc, char **argv) } } if( 32 == type_size ) { - int32_t *in_int32 = (int32_t*)in_buf, - *inout_int32 = (int32_t*)inout_buf, + int32_t *in_int32 = (int32_t*)((char*)in_buf + op1_alignment * sizeof(int32_t)), + *inout_int32 = (int32_t*)((char*)inout_buf + res_alignment * sizeof(int32_t)), *inout_int32_for_check = (int32_t*)inout_check_buf; for( i = 0; i < count; i++ ) { in_int32[i] = 5; @@ -391,8 +438,8 @@ int main(int argc, char **argv) } } if( 64 == type_size ) { - int64_t *in_int64 = (int64_t*)in_buf, - *inout_int64 = (int64_t*)inout_buf, + int64_t *in_int64 = (int64_t*)((char*)in_buf + op1_alignment * sizeof(int64_t)), + *inout_int64 = (int64_t*)((char*)inout_buf + res_alignment * sizeof(int64_t)), *inout_int64_for_check = (int64_t*)inout_check_buf; for( i = 0; i < count; i++ ) { in_int64[i] = 5; @@ -440,8 +487,8 @@ int main(int argc, char **argv) if( 'u' == type[type_idx] ) { if( 8 == type_size ) { - uint8_t *in_uint8 = (uint8_t*)in_buf, - *inout_uint8 = (uint8_t*)inout_buf, + uint8_t *in_uint8 = (uint8_t*)((char*)in_buf + op1_alignment * sizeof(uint8_t)), + *inout_uint8 = (uint8_t*)((char*)inout_buf + res_alignment * sizeof(uint8_t)), *inout_uint8_for_check = (uint8_t*)inout_check_buf; for( i = 0; i < count; i++ ) { in_uint8[i] = 5; @@ -486,8 +533,8 @@ int main(int argc, char **argv) } } if( 16 == type_size ) { - uint16_t *in_uint16 = (uint16_t*)in_buf, - *inout_uint16 = (uint16_t*)inout_buf, + uint16_t *in_uint16 = (uint16_t*)((char*)in_buf + op1_alignment * sizeof(uint16_t)), + *inout_uint16 = (uint16_t*)((char*)inout_buf + res_alignment * sizeof(uint16_t)), *inout_uint16_for_check = (uint16_t*)inout_check_buf; for( i = 0; i < count; i++ ) { in_uint16[i] = 5; @@ -532,8 +579,8 @@ int main(int argc, char **argv) } } if( 32 == type_size ) { - uint32_t *in_uint32 = (uint32_t*)in_buf, - *inout_uint32 = (uint32_t*)inout_buf, + uint32_t *in_uint32 = (uint32_t*)((char*)in_buf + op1_alignment * sizeof(uint32_t)), + *inout_uint32 = (uint32_t*)((char*)inout_buf + res_alignment * sizeof(uint32_t)), *inout_uint32_for_check = (uint32_t*)inout_check_buf; for( i = 0; i < count; i++ ) { in_uint32[i] = 5; @@ -578,8 +625,8 @@ int main(int argc, char **argv) } } if( 64 == type_size ) { - uint64_t *in_uint64 = (uint64_t*)in_buf, - *inout_uint64 = (uint64_t*)inout_buf, + uint64_t *in_uint64 = (uint64_t*)((char*)in_buf + op1_alignment * sizeof(uint64_t)), + *inout_uint64 = (uint64_t*)((char*)inout_buf + res_alignment * sizeof(uint64_t)), *inout_uint64_for_check = (uint64_t*)inout_check_buf; for( i = 0; i < count; i++ ) { in_uint64[i] = 5; @@ -626,8 +673,8 @@ int main(int argc, char **argv) } if( 'f' == type[type_idx] ) { - float *in_float = (float*)in_buf, - *inout_float = (float*)inout_buf, + float *in_float = (float*)((char*)in_buf + op1_alignment * sizeof(float)), + *inout_float = (float*)((char*)inout_buf + res_alignment * sizeof(float)), *inout_float_for_check = (float*)inout_check_buf; for( i = 0; i < count; i++ ) { in_float[i] = 1000.0+1; @@ -658,8 +705,8 @@ int main(int argc, char **argv) } if( 'd' == type[type_idx] ) { - double *in_double = (double*)in_buf, - *inout_double = (double*)inout_buf, + double *in_double = (double*)((char*)in_buf + op1_alignment * sizeof(double)), + *inout_double = (double*)((char*)inout_buf + res_alignment * sizeof(double)), *inout_double_for_check = (double*)inout_check_buf; for( i = 0; i < count; i++ ) { in_double[i] = 10.0+1; @@ -691,7 +738,7 @@ int main(int argc, char **argv) check_and_continue: if( !skip_op_type ) print_status(array_of_ops[do_ops[op_idx]].mpi_op_name, - mpi_type, type_size, count, tend-tstart, correctness); + mpi_type, type_size, count, max_shift, duration, repeats, correctness); } if( !skip_op_type ) printf("\n"); diff --git a/test/spc/spc_test.c b/test/spc/spc_test.c index 57b593fd934..290b58eedae 100644 --- a/test/spc/spc_test.c +++ b/test/spc/spc_test.c @@ -74,6 +74,10 @@ int main(int argc, char **argv) MPI_result = MPI_T_pvar_get_info(i, name, &name_len, &verbosity, &var_class, &datatype, &enumtype, description, &desc_len, &bind, &readonly, &continuous, &atomic); + if (MPI_result == MPI_T_ERR_INVALID) { + // skip invalidated MPI_T pvars + continue; + } if(MPI_result != MPI_SUCCESS || MPI_result == MPI_T_ERR_PVAR_NO_STARTSTOP) { fprintf(stderr, "Failed to get pvar info.\n"); MPI_Abort(MPI_COMM_WORLD, MPI_result); @@ -82,6 +86,7 @@ int main(int argc, char **argv) if(strcmp(name, counter_names[rank]) == 0) { index = i; printf("[%d] %s -> %s\n", rank, name, description); + break; } }